import streamlit as st from transformers import pipeline, AutoProcessor, AutoModel import torch from diffusers import FluxPipeline from IPython.display import Audio # llama model_id = "meta-llama/Llama-3.2-3B-Instruct" pipe = pipeline( "text-generation", model=model_id, torch_dtype=torch.bfloat16, device_map="auto", ) messages = [ {"role": "system", "content": "You are a chatbot that writes Shakespeare given a prompt, the text you write should be 25 lines long."}, ] # blackforest flux = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16) flux.enable_model_cpu_offload() #suno processor = AutoProcessor.from_pretrained("suno/bark-small") vc = AutoModel.from_pretrained("suno/bark-small") def poet(text): messages.append({"role": "user", "content": text}) outputs = pipe( messages, max_new_tokens=256, ) print(outputs[0]["generated_text"][-1]) def poet_image(poetry): poetry = 'Create an image based on the following shakespeare like text: ' + poetry image = flux( poetry, height=1024, width=1024, guidance_scale=3.5, num_inference_steps=50, max_sequence_length=512, generator=torch.Generator("cpu").manual_seed(0) ).images[0] return image def poet_bard(poetry): inputs = processor( text=[poetry], return_tensors="pt", ) speech_values = vc.generate(**inputs, do_sample=True) sampling_rate = vc.generation_config.sample_rate return Audio(speech_values.cpu().numpy().squeeze(), rate=sampling_rate) st.title("Shakespeare Ai") st.write("A space made to allow people to create shakespeare like text with images!") # get prompt prompt = st.text_input("Enter your prompt: ") st.button("Generate Shakespeare") # analyze prompt shakespeare = poet(prompt) bard = poet_bard(shakespeare) img = poet_image(shakespeare) # write content st.write(shakespeare) st.audio(bard) st.image(img)