Spaces:
Running
Running
File size: 2,206 Bytes
885aabb 599621d 86f6523 599621d 3f152b0 885aabb 86f6523 3f152b0 86f6523 885aabb 3f152b0 885aabb b352af0 885aabb d8861bf 885aabb 3f152b0 885aabb 3f152b0 885aabb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import streamlit as st
from transformers import pipeline
from gtts import gTTS
import os
from PIL import Image
# Load models
def load_models():
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
storyteller = pipeline(
"text-generation",
model="pranavpsv/gpt2-genre-story-generator",
temperature=0.75,
max_length=100
)
return image_to_text, storyteller
# Process image to text
def generate_caption(image, image_to_text):
result = image_to_text(image)
return result[0]["generated_text"] if result else "No caption generated."
# Generate a narrative story using the GPT-2 genre-based story generator
def generate_story(text, storyteller):
prompt = f"<BOS> <superhero> {text}"
story = storyteller(prompt, max_length=100, num_return_sequences=1)
# Clean the generated text by removing the <BOS> <superhero> prefix
generated_story = story[0]["generated_text"].replace("<BOS> <superhero>", "").strip()
return generated_story if generated_story else "No story generated."
# Convert text to speech
def text_to_speech(text, filename="output.mp3"):
tts = gTTS(text)
tts.save(filename)
return filename
# Main Streamlit app
def main():
st.title("AI-Powered Image Captioning and Storytelling")
image_to_text, storyteller = load_models()
uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])
if uploaded_file is not None:
# Convert uploaded file to a PIL image
image = Image.open(uploaded_file)
st.image(image, caption="Uploaded Image", use_container_width=True)
with st.spinner("Generating caption..."):
caption = generate_caption(image, image_to_text)
st.write("### Image Caption:")
st.write(caption)
with st.spinner("Generating story..."):
story = generate_story(caption, storyteller)
st.write("### Generated Story:")
st.write(story)
with st.spinner("Generating speech..."):
audio_file = text_to_speech(story)
st.audio(audio_file, format="audio/mp3")
if __name__ == "__main__":
main()
|