Spaces:

xieqilenb
/

blabla

Running

App Files Files Community

xieqilenb commited on 3 days ago

Commit

7f4a56e

verified ·

1 Parent(s): fd902a3

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -61

app.py CHANGED Viewed

@@ -1,70 +1,74 @@
 import streamlit as st
-from PIL import Image
 from transformers import pipeline
 from gtts import gTTS
-st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
-def generate_caption(image_file):
-    image = Image.open(image_file)
-    caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-    caption_results = caption_generator(image)
-    caption = caption_results[0]['generated_text']
-    return caption
-def generate_story(caption):
-    story_generator = pipeline("text-generation", model="Qwen/Qwen2-1.5B")
-    messages = "Please based on following image caption: "+caption+", generate a complete fairy tale story for children with at least 100 words and max 300 words"
-    result = story_generator(messages, max_length=300, num_return_sequences=1)
-    story = result[0]['generated_text']
-    return story
-@st.cache_resource
-# def load_image_generator():
-#     device = "cuda" if torch.cuda.is_available() else "cpu"
-#     torch_dtype = torch.float16 if device == "cuda" else torch.float32
-#     pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
-#     pipe = pipe.to(device)
-#     return pipe
-# def generate_illustration(prompt):
-#     pipe = load_image_generator()
-#     image_result = pipe(prompt)
-#     generated_image = image_result.images[0]
-#     return generated_image
-def text_to_speech(text, output_file="output.mp3"):
     tts = gTTS(text=text, lang="en")
-    tts.save(output_file)
-    return output_file
-def main():
-    st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
-    st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
-    uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])
-    if uploaded_file is not None:
-        image = Image.open(uploaded_file)
-        st.image(image, caption="Uploaded image", use_container_width=True)
-        with st.spinner("Image caption being generated..."):
-            caption = generate_caption(uploaded_file)
-        st.write("**Image Caption:**", caption)
-        with st.spinner("Generating story..."):
-            story = generate_story(caption)
-        st.write("**Story:**")
-        st.write(story)
-        # with st.spinner("Generating illustration..."):
-        #     illustration = generate_illustration(story[:200])
-        # st.write("### Story Illustrations：")
-        # st.image(illustration, caption="Story Illustrations", use_container_width=True)
-        with st.spinner("Converting to voice...."):
-            audio_file = text_to_speech(story)
-        st.audio(audio_file, format="audio/mp3")
-if __name__ == "__main__":
-    main()

 import streamlit as st
 from transformers import pipeline
+from PIL import Image
 from gtts import gTTS
+from io import BytesIO
+def get_image_captioner():
+    return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
+def get_story_generator():
+    return pipeline("text-generation", model="Qwen/Qwen2-1.5B")
+def text_to_speech(text):
     tts = gTTS(text=text, lang="en")
+    audio_bytes = BytesIO()
+    tts.write_to_fp(audio_bytes)
+    audio_bytes.seek(0)
+    return audio_bytes
+st.set_page_config(page_title="Image to Audio Story", page_icon="🦜")
+st.title("Children's Storytelling App")
+st.write("Upload an image and let the magic create a story, then convert it to audio!")
+uploaded_file = st.file_uploader("Select an image...", type=["jpg", "png", "jpeg"])
+if uploaded_file is not None:
+    try:
+        image = Image.open(uploaded_file).convert("RGB")
+        st.image(image, caption="Uploaded Image", use_container_width=True)
+        if st.button("Generate Story"):
+            with st.spinner("Generating content..."):
+                captioner = get_image_captioner()
+                caption_result = captioner(image)
+                caption = caption_result[0]["generated_text"]
+                st.subheader("Image Caption")
+                st.write(caption)
+                prompt = (
+                    "You are a creative children's story writer. Based on the following image details, "
+                    "please write an imaginative story for children aged 3-10. Do not simply rephrase the image details; "
+                    "instead, expand creatively by adding fun characters, adventures, and unexpected twists. "
+                    "The story must be at least 100 words long.\n\n"
+                    f"Image Details: {caption}\n\nStory:"
+                )
+                story_generator = get_story_generator()
+                story_result = story_generator(
+                    prompt,
+                    max_length=300,
+                    min_length=100,
+                    num_return_sequences=1,
+                    do_sample=True,
+                    top_p=0.95,
+                    top_k=50
+                )
+                story = story_result[0]["generated_text"]
+                while len(story.split()) < 100:
+                    story_result = story_generator(
+                        prompt,
+                        max_length=300,
+                        min_length=100,
+                        num_return_sequences=1,
+                        do_sample=True,
+                        top_p=0.95,
+                        top_k=50
+                    )
+                    story = story_result[0]["generated_text"]
+                if "Story:" in story:
+                    story = story.split("Story:", 1)[-1].strip()
+                st.subheader("Generated Story")
+                st.write(story)
+                audio_bytes = text_to_speech(story)
+                st.subheader("Listen to the Story")
+                st.audio(audio_bytes, format="audio/mp3")
+    except Exception as e:
+        st.error(f"An error occurred: {e}")