shakespeare / app.py
drod75's picture
Update app.py
35580ab verified
raw
history blame
2 kB
import streamlit as st
from transformers import pipeline, AutoProcessor, AutoModel
import torch
from diffusers import FluxPipeline
from IPython.display import Audio
# llama
model_id = "meta-llama/Llama-3.2-3B-Instruct"
pipe = pipeline(
"text-generation",
model=model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
messages = [
{"role": "system", "content": "You are a chatbot that writes Shakespeare given a prompt, the text you write should be 25 lines long."},
]
# blackforest
flux = FluxPipeline.from_pretrained("black-forest-labs/FLUX.1-dev", torch_dtype=torch.bfloat16)
flux.enable_model_cpu_offload()
#suno
processor = AutoProcessor.from_pretrained("suno/bark-small")
vc = AutoModel.from_pretrained("suno/bark-small")
def poet(text):
messages.append({"role": "user", "content": text})
outputs = pipe(
messages,
max_new_tokens=256,
)
print(outputs[0]["generated_text"][-1])
def poet_image(poetry):
poetry = 'Create an image based on the following shakespeare like text: ' + poetry
image = flux(
poetry,
height=1024,
width=1024,
guidance_scale=3.5,
num_inference_steps=50,
max_sequence_length=512,
generator=torch.Generator("cpu").manual_seed(0)
).images[0]
return image
def poet_bard(poetry):
inputs = processor(
text=[poetry],
return_tensors="pt",
)
speech_values = vc.generate(**inputs, do_sample=True)
sampling_rate = vc.generation_config.sample_rate
return Audio(speech_values.cpu().numpy().squeeze(), rate=sampling_rate)
st.title("Shakespeare Ai")
st.write("A space made to allow people to create shakespeare like text with images!")
# get prompt
prompt = st.text_input("Enter your prompt: ")
st.button("Generate Shakespeare")
# analyze prompt
shakespeare = poet(prompt)
bard = poet_bard(shakespeare)
img = poet_image(shakespeare)
# write content
st.write(shakespeare)
st.audio(bard)
st.image(img)