Spaces:

xieqilenb
/

blabla

Running

App Files Files Community

xieqilenb commited on 6 days ago

Commit

b55983c

verified ·

1 Parent(s): 342ef7b

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -9

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ from PIL import Image
 from transformers import pipeline
 from gtts import gTTS
-# 设置页面基本配置：标题、标签和图标
 st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 def generate_caption(image_file):
@@ -13,22 +12,23 @@ def generate_caption(image_file):
     caption = caption_results[0]['generated_text']
     return caption
 def generate_story(prompt):
     story_generator = pipeline("text-generation", model="gpt2")
     result = story_generator(prompt, max_length=300, num_return_sequences=1)
     story = result[0]['generated_text']
-    # 如果生成的故事长度较短，则额外生成一部分内容
     if len(story.split()) < 100:
         additional = story_generator(prompt, max_length=350, num_return_sequences=1)[0]['generated_text']
         story += " " + additional
     return story
 # ----------------------------
-# generate_illustration (暂时注释掉，如果需要启用请解除注释)
 # ----------------------------
 @st.cache_resource
 # def load_image_generator():
 #     device = "cuda" if torch.cuda.is_available() else "cpu"
 #     torch_dtype = torch.float16 if device == "cuda" else torch.float32
 #     pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
@@ -41,24 +41,23 @@ def generate_story(prompt):
 #     generated_image = image_result.images[0]
 #     return generated_image
 def text_to_speech(text, output_file="output.mp3"):
     tts = gTTS(text=text, lang="en")
     tts.save(output_file)
     return output_file
 def main():
-    # 居中显示标题，添加图标
     st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
     st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
-    # 图片上传
     uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded image", use_column_width=True)
-        with st.spinner("Generating image caption..."):
             caption = generate_caption(uploaded_file)
         st.write("**Image Caption:**", caption)
@@ -68,13 +67,13 @@ def main():
         st.write("**Story:**")
         st.write(story)
-        # 以下代码为生成故事插图（需要启用相关模型支持）
         # with st.spinner("Generating illustration..."):
         #     illustration = generate_illustration(story[:200])
-        # st.write("### Story Illustrations:")
         # st.image(illustration, caption="Story Illustrations", use_column_width=True)
-        with st.spinner("Converting text to voice..."):
             audio_file = text_to_speech(story)
         st.audio(audio_file, format="audio/mp3")

 from transformers import pipeline
 from gtts import gTTS
 st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 def generate_caption(image_file):
     caption = caption_results[0]['generated_text']
     return caption
 def generate_story(prompt):
     story_generator = pipeline("text-generation", model="gpt2")
     result = story_generator(prompt, max_length=300, num_return_sequences=1)
     story = result[0]['generated_text']
     if len(story.split()) < 100:
         additional = story_generator(prompt, max_length=350, num_return_sequences=1)[0]['generated_text']
         story += " " + additional
     return story
 # ----------------------------
+# generate_illustration
 # ----------------------------
 @st.cache_resource
 # def load_image_generator():
 #     device = "cuda" if torch.cuda.is_available() else "cpu"
 #     torch_dtype = torch.float16 if device == "cuda" else torch.float32
 #     pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
 #     generated_image = image_result.images[0]
 #     return generated_image
 def text_to_speech(text, output_file="output.mp3"):
     tts = gTTS(text=text, lang="en")
     tts.save(output_file)
     return output_file
 def main():
     st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
     st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
     uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])
     if uploaded_file is not None:
         image = Image.open(uploaded_file)
         st.image(image, caption="Uploaded image", use_column_width=True)
+        with st.spinner("Image caption being generated..."):
             caption = generate_caption(uploaded_file)
         st.write("**Image Caption:**", caption)
         st.write("**Story:**")
         st.write(story)
         # with st.spinner("Generating illustration..."):
         #     illustration = generate_illustration(story[:200])
+        # st.write("### Story Illustrations：")
         # st.image(illustration, caption="Story Illustrations", use_column_width=True)
+        with st.spinner("Converting to voice...."):
             audio_file = text_to_speech(story)
         st.audio(audio_file, format="audio/mp3")