Spaces:

xieqilenb
/

blabla

Running

App Files Files Community

xieqilenb commited on 3 days ago

Commit

06bce28

verified ·

1 Parent(s): 7f4a56e

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -65

app.py CHANGED Viewed

@@ -1,74 +1,92 @@
 import streamlit as st
-from transformers import pipeline
 from PIL import Image
 from gtts import gTTS
-from io import BytesIO
-def get_image_captioner():
-    return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
-def get_story_generator():
-    return pipeline("text-generation", model="Qwen/Qwen2-1.5B")
-def text_to_speech(text):
-    tts = gTTS(text=text, lang="en")
-    audio_bytes = BytesIO()
-    tts.write_to_fp(audio_bytes)
-    audio_bytes.seek(0)
-    return audio_bytes
-st.set_page_config(page_title="Image to Audio Story", page_icon="🦜")
-st.title("Children's Storytelling App")
-st.write("Upload an image and let the magic create a story, then convert it to audio!")
-uploaded_file = st.file_uploader("Select an image...", type=["jpg", "png", "jpeg"])
-if uploaded_file is not None:
-    try:
-        image = Image.open(uploaded_file).convert("RGB")
-        st.image(image, caption="Uploaded Image", use_container_width=True)
-        if st.button("Generate Story"):
-            with st.spinner("Generating content..."):
-                captioner = get_image_captioner()
-                caption_result = captioner(image)
-                caption = caption_result[0]["generated_text"]
-                st.subheader("Image Caption")
-                st.write(caption)
-                prompt = (
-                    "You are a creative children's story writer. Based on the following image details, "
-                    "please write an imaginative story for children aged 3-10. Do not simply rephrase the image details; "
-                    "instead, expand creatively by adding fun characters, adventures, and unexpected twists. "
-                    "The story must be at least 100 words long.\n\n"
-                    f"Image Details: {caption}\n\nStory:"
-                )
-                story_generator = get_story_generator()
-                story_result = story_generator(
-                    prompt,
-                    max_length=300,
-                    min_length=100,
-                    num_return_sequences=1,
-                    do_sample=True,
-                    top_p=0.95,
-                    top_k=50
-                )
-                story = story_result[0]["generated_text"]
-                while len(story.split()) < 100:
-                    story_result = story_generator(
-                        prompt,
-                        max_length=300,
-                        min_length=100,
-                        num_return_sequences=1,
-                        do_sample=True,
-                        top_p=0.95,
-                        top_k=50
-                    )
-                    story = story_result[0]["generated_text"]
-                if "Story:" in story:
-                    story = story.split("Story:", 1)[-1].strip()
-                st.subheader("Generated Story")
-                st.write(story)
-                audio_bytes = text_to_speech(story)
-                st.subheader("Listen to the Story")
-                st.audio(audio_bytes, format="audio/mp3")
-    except Exception as e:
-        st.error(f"An error occurred: {e}")

 import streamlit as st
 from PIL import Image
+from transformers import pipeline
 from gtts import gTTS
+import torch
+st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
+# 判断是否有可用的 GPU，如果有则使用 GPU（device=0），否则使用 CPU（device=-1）
+device_id = 0 if torch.cuda.is_available() else -1
+def generate_caption(image_file):
+    image = Image.open(image_file)
+    # 使用 GPU 进行图像描述生成，如果可用
+    caption_generator = pipeline(
+        "image-to-text",
+        model="Salesforce/blip-image-captioning-base",
+        device=device_id
+    )
+    caption_results = caption_generator(image)
+    caption = caption_results[0]['generated_text']
+    return caption
+def generate_story(caption):
+    # 使用 GPU 进行文本生成操作
+    story_generator = pipeline(
+        "text-generation",
+        model="Qwen/Qwen2-1.5B",
+        device=device_id
+    )
+    messages = (
+        "Please based on following image caption: " + caption +
+        ", generate a complete fairy tale story for children with at least 100 words and max 300 words"
+    )
+    result = story_generator(messages, max_length=300, num_return_sequences=1)
+    story = result[0]['generated_text']
+    return story
+# 以下部分为生成插图示例代码，已注释。如果需要使用 GPU，请取消注释并确保 diffusers 相关依赖已经安装
+# @st.cache_resource
+# def load_image_generator():
+#     from diffusers import DiffusionPipeline
+#     device = "cuda" if torch.cuda.is_available() else "cpu"
+#     torch_dtype = torch.float16 if device == "cuda" else torch.float32
+#     pipe = DiffusionPipeline.from_pretrained(
+#         "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch_dtype
+#     )
+#     pipe = pipe.to(device)
+#     return pipe
+#
+# def generate_illustration(prompt):
+#     pipe = load_image_generator()
+#     image_result = pipe(prompt)
+#     generated_image = image_result.images[0]
+#     return generated_image
+def text_to_speech(text, output_file="output.mp3"):
+    tts = gTTS(text=text, lang="en")
+    tts.save(output_file)
+    return output_file
+def main():
+    st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
+    st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
+    uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])
+    if uploaded_file is not None:
+        image = Image.open(uploaded_file)
+        st.image(image, caption="Uploaded image", use_container_width=True)
+        with st.spinner("Image caption being generated..."):
+            caption = generate_caption(uploaded_file)
+        st.write("**Image Caption:**", caption)
+        with st.spinner("Generating story..."):
+            story = generate_story(caption)
+        st.write("**Story:**")
+        st.write(story)
+        # 如果需要生成插图，请取消以下代码的注释
+        # with st.spinner("Generating illustration..."):
+        #     illustration = generate_illustration(story[:200])
+        # st.write("### Story Illustrations:")
+        # st.image(illustration, caption="Story Illustrations", use_container_width=True)
+        with st.spinner("Converting to voice..."):
+            audio_file = text_to_speech(story)
+        st.audio(audio_file, format="audio/mp3")
+if __name__ == "__main__":
+    main()