Spaces:
Running
Running
import streamlit as st | |
from transformers import pipeline | |
from gtts import gTTS | |
import os | |
from PIL import Image | |
# Load models | |
def load_models(): | |
image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") | |
storyteller = pipeline( | |
"text-generation", | |
model="pranavpsv/gpt2-genre-story-generator", | |
temperature=0.75, | |
max_length=100 | |
) | |
return image_to_text, storyteller | |
# Process image to text | |
def generate_caption(image, image_to_text): | |
result = image_to_text(image) | |
return result[0]["generated_text"] if result else "No caption generated." | |
# Generate a narrative story using the GPT-2 genre-based story generator | |
def generate_story(text, storyteller): | |
prompt = f"<BOS> <superhero> {text}" | |
story = storyteller(prompt, max_length=100, num_return_sequences=1) | |
# Clean the generated text by removing the <BOS> <superhero> prefix | |
generated_story = story[0]["generated_text"].replace("<BOS> <superhero>", "").strip() | |
return generated_story if generated_story else "No story generated." | |
# Convert text to speech | |
def text_to_speech(text, filename="output.mp3"): | |
tts = gTTS(text) | |
tts.save(filename) | |
return filename | |
# Main Streamlit app | |
def main(): | |
st.title("AI-Powered Image Captioning and Storytelling") | |
image_to_text, storyteller = load_models() | |
uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"]) | |
if uploaded_file is not None: | |
# Convert uploaded file to a PIL image | |
image = Image.open(uploaded_file) | |
st.image(image, caption="Uploaded Image", use_container_width=True) | |
with st.spinner("Generating caption..."): | |
caption = generate_caption(image, image_to_text) | |
st.write("### Image Caption:") | |
st.write(caption) | |
with st.spinner("Generating story..."): | |
story = generate_story(caption, storyteller) | |
st.write("### Generated Story:") | |
st.write(story) | |
with st.spinner("Generating speech..."): | |
audio_file = text_to_speech(story) | |
st.audio(audio_file, format="audio/mp3") | |
if __name__ == "__main__": | |
main() | |