Spaces:

xieqilenb
/

blabla

Running

App Files Files Community

xieqilenb commited on 6 days ago

Commit

342ef7b

verified ·

1 Parent(s): 7577927

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -13

app.py CHANGED Viewed

@@ -3,6 +3,8 @@ from PIL import Image
 from transformers import pipeline
 from gtts import gTTS
 def generate_caption(image_file):
     image = Image.open(image_file)
@@ -11,23 +13,22 @@ def generate_caption(image_file):
     caption = caption_results[0]['generated_text']
     return caption
 def generate_story(prompt):
     story_generator = pipeline("text-generation", model="gpt2")
     result = story_generator(prompt, max_length=300, num_return_sequences=1)
     story = result[0]['generated_text']
     if len(story.split()) < 100:
         additional = story_generator(prompt, max_length=350, num_return_sequences=1)[0]['generated_text']
         story += " " + additional
     return story
 # ----------------------------
-# generate_illustration
 # ----------------------------
 @st.cache_resource
 # def load_image_generator():
 #     device = "cuda" if torch.cuda.is_available() else "cpu"
 #     torch_dtype = torch.float16 if device == "cuda" else torch.float32
 #     pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
@@ -40,38 +41,40 @@ def generate_story(prompt):
 #     generated_image = image_result.images[0]
 #     return generated_image
 def text_to_speech(text, output_file="output.mp3"):
     tts = gTTS(text=text, lang="en")
     tts.save(output_file)
     return output_file
 def main():
-    st.title("Storytelling App")
-    st.write("Upload a image and we will generate an interesting story based on the picture and convert it into a voice playback!")
     uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded image", use_column_width=True)
-        with st.spinner("Image caption being generated..."):
             caption = generate_caption(uploaded_file)
-        st.write("Image Caption：", caption)
         with st.spinner("Generating story..."):
-            story = generate_story(caption)
-        st.write("Story：")
         st.write(story)
         # with st.spinner("Generating illustration..."):
         #     illustration = generate_illustration(story[:200])
-        # st.write("### Story Illustrations：")
         # st.image(illustration, caption="Story Illustrations", use_column_width=True)
-        with st.spinner("Converting to voice...."):
             audio_file = text_to_speech(story)
         st.audio(audio_file, format="audio/mp3")

 from transformers import pipeline
 from gtts import gTTS
+# 设置页面基本配置：标题、标签和图标
+st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 def generate_caption(image_file):
     image = Image.open(image_file)
     caption = caption_results[0]['generated_text']
     return caption
 def generate_story(prompt):
     story_generator = pipeline("text-generation", model="gpt2")
     result = story_generator(prompt, max_length=300, num_return_sequences=1)
     story = result[0]['generated_text']
+    # 如果生成的故事长度较短，则额外生成一部分内容
     if len(story.split()) < 100:
         additional = story_generator(prompt, max_length=350, num_return_sequences=1)[0]['generated_text']
         story += " " + additional
     return story
 # ----------------------------
+# generate_illustration (暂时注释掉，如果需要启用请解除注释)
 # ----------------------------
 @st.cache_resource
 # def load_image_generator():
 #     device = "cuda" if torch.cuda.is_available() else "cpu"
 #     torch_dtype = torch.float16 if device == "cuda" else torch.float32
 #     pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
 #     generated_image = image_result.images[0]
 #     return generated_image
 def text_to_speech(text, output_file="output.mp3"):
     tts = gTTS(text=text, lang="en")
     tts.save(output_file)
     return output_file
 def main():
+    # 居中显示标题，添加图标
+    st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
+    st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
+    # 图片上传
     uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded image", use_column_width=True)
+        with st.spinner("Generating image caption..."):
             caption = generate_caption(uploaded_file)
+        st.write("**Image Caption:**", caption)
         with st.spinner("Generating story..."):
+            story_prompt = f"Please generate a children's story based on this description: {caption}"
+            story = generate_story(story_prompt)
+        st.write("**Story:**")
         st.write(story)
+        # 以下代码为生成故事插图（需要启用相关模型支持）
         # with st.spinner("Generating illustration..."):
         #     illustration = generate_illustration(story[:200])
+        # st.write("### Story Illustrations:")
         # st.image(illustration, caption="Story Illustrations", use_column_width=True)
+        with st.spinner("Converting text to voice..."):
             audio_file = text_to_speech(story)
         st.audio(audio_file, format="audio/mp3")