Spaces:

xieqilenb
/

blabla

Running

App Files Files Community

xieqilenb commited on 3 days ago

Commit

ab58353

verified ·

1 Parent(s): 50d76c0

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -40

app.py CHANGED Viewed

@@ -6,56 +6,46 @@ import torch
 st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
-# 判断是否有可用的 GPU，如果有则使用 GPU（device=0），否则使用 CPU（device=-1）
-device_id = 0 if torch.cuda.is_available() else -1
 def generate_caption(image_file):
     image = Image.open(image_file)
-    # 使用 GPU 进行图像描述生成，如果可用
     caption_generator = pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
-        device=device_id
     )
     caption_results = caption_generator(image)
     caption = caption_results[0]['generated_text']
     return caption
 def generate_story(caption):
-    # 使用 GPU 进行文本生成操作
     story_generator = pipeline(
         "text-generation",
         model="Qwen/Qwen2-1.5B",
-        device=device_id
     )
-    messages = (
-                    "You are a creative children's story writer. Based on the following image details, "
-                    "please write an imaginative story for children aged 3-10. Do not simply rephrase the image details; "
-                    "instead, expand creatively by adding fun characters, adventures, and unexpected twists. "
-                    "The story must be at least 100 words long.\n\n"
-                    f"Image Details: {caption}\n\nStory:"
-                )
-    result = story_generator(messages, max_length=300, num_return_sequences=1)
-    story = result[0]['generated_text']
     return story
-000000000
-# 以下部分为生成插图示例代码，已注释。如果需要使用 GPU，请取消注释并确保 diffusers 相关依赖已经安装
-# @st.cache_resource
-# def load_image_generator():
-#     from diffusers import DiffusionPipeline
-#     device = "cuda" if torch.cuda.is_available() else "cpu"
-#     torch_dtype = torch.float16 if device == "cuda" else torch.float32
-#     pipe = DiffusionPipeline.from_pretrained(
-#         "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch_dtype
-#     )
-#     pipe = pipe.to(device)
-#     return pipe
-#
-# def generate_illustration(prompt):
-#     pipe = load_image_generator()
-#     image_result = pipe(prompt)
-#     generated_image = image_result.images[0]
-#     return generated_image
 def text_to_speech(text, output_file="output.mp3"):
     tts = gTTS(text=text, lang="en")
@@ -80,13 +70,6 @@ def main():
             story = generate_story(caption)
         st.write("**Story:**")
         st.write(story)
-        # 如果需要生成插图，请取消以下代码的注释
-        # with st.spinner("Generating illustration..."):
-        #     illustration = generate_illustration(story[:200])
-        # st.write("### Story Illustrations:")
-        # st.image(illustration, caption="Story Illustrations", use_container_width=True)
         with st.spinner("Converting to voice..."):
             audio_file = text_to_speech(story)
         st.audio(audio_file, format="audio/mp3")

 st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 def generate_caption(image_file):
     image = Image.open(image_file)
     caption_generator = pipeline(
         "image-to-text",
         model="Salesforce/blip-image-captioning-base",
     )
     caption_results = caption_generator(image)
     caption = caption_results[0]['generated_text']
     return caption
 def generate_story(caption):
     story_generator = pipeline(
         "text-generation",
         model="Qwen/Qwen2-1.5B",
     )
+    prompt = (
+        "You are a talented children's story writer renowned for your creativity and captivating narratives. "
+        "Using the image details provided below, please compose an enchanting tale tailored for children aged 3 to 10. "
+        "Rather than simply rephrasing the image details, enrich your story with imaginative characters, quirky adventures, "
+        "and delightful surprises. Let your narrative flow naturally and spark wonder in your young audience. "
+        "Please ensure that your story is engaging, coherent, and falls between 100 and 300 words in length.\n\n"
+        f"Image Details: {caption}\n\nStory:"
+    )
+    result = story_generator(prompt, max_length=300, num_return_sequences=1)
+    full_text = result[0]['generated_text']
+    if "Story:" in full_text:
+        story = full_text.split("Story:", 1)[1].strip()
+    else:
+        story = full_text.strip()
+    words = story.split()
+    if len(words) > 300:
+        story = " ".join(words[:300])
+    elif len(words) < 100:
+        story += "\n\n(Note: The generated story is shorter than the desired 100 words.)"
     return story
 def text_to_speech(text, output_file="output.mp3"):
     tts = gTTS(text=text, lang="en")
             story = generate_story(caption)
         st.write("**Story:**")
         st.write(story)
         with st.spinner("Converting to voice..."):
             audio_file = text_to_speech(story)
         st.audio(audio_file, format="audio/mp3")