IrisDeng commited on
Commit
96644c9
·
verified ·
1 Parent(s): c9b1bb5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -42
app.py CHANGED
@@ -2,57 +2,72 @@ import streamlit as st
2
  from PIL import Image
3
  from transformers import pipeline
4
  from gtts import gTTS
 
5
 
6
  st.set_page_config(page_title="Image to Audio Story", page_icon="🦜")
7
 
8
- # Load models once
9
- caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
- story_pipeline = pipeline("text-generation", model="Qwen/Qwen2-1.5B")
11
-
12
  def extract_image_caption(image_data):
13
- img_obj = Image.open(image_data)
14
- caption_results = caption_pipeline(img_obj)
15
- return caption_results[0]['generated_text']
 
 
 
 
 
 
 
 
16
 
17
  def compose_story_from_caption(caption_detail):
18
- prompt_text = (
19
- "You are a talented and imaginative storyteller for children aged 3 to 10. "
20
- "Using the details derived from the image below, craft a captivating tale that goes beyond merely describing the scene. "
21
- "Let your creativity shine by introducing engaging characters, adventurous journeys, and delightful surprises. "
22
- "Your story should be vivid, original, and between 100 and 300 words in length.\n\n"
23
- f"Image Details: {caption_detail}\n\nStory:"
24
- )
25
- story_results = story_pipeline(prompt_text, num_return_sequences=1)
26
- story_text = story_results[0]['generated_text']
27
- return story_text.split("Story:", 1)[1].strip() if "Story:" in story_text else story_text.strip()
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def convert_text_to_audio(text_content, audio_path="output.mp3"):
30
- tts_engine = gTTS(text=text_content, lang="en")
31
- tts_engine.save(audio_path)
32
- return audio_path
 
 
 
33
 
34
  def run_app():
35
- st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
36
- st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
 
 
 
 
 
37
 
38
- uploaded_image = st.file_uploader("Select an Image", type=["png", "jpg", "jpeg"])
 
 
39
 
40
- if uploaded_image is not None:
41
- image_display = Image.open(uploaded_image)
42
- st.image(image_display, caption="Uploaded Image", use_container_width=True)
43
-
44
- with st.spinner("Generating caption for the image..."):
45
- caption_text = extract_image_caption(uploaded_image)
46
- st.write("**Generated Caption:**", caption_text)
47
-
48
- with st.spinner("Composing story..."):
49
- story_text = compose_story_from_caption(caption_text)
50
- st.write("**Story:**")
51
- st.write(story_text)
52
-
53
- with st.spinner("Converting text to audio..."):
54
- audio_file = convert_text_to_audio(story_text)
55
- st.audio(audio_file, format="audio/mp3")
56
-
57
- if __name__ == "__main__":
58
- run_app()
 
2
  from PIL import Image
3
  from transformers import pipeline
4
  from gtts import gTTS
5
+ import torch
6
 
7
  st.set_page_config(page_title="Image to Audio Story", page_icon="🦜")
8
 
 
 
 
 
9
  def extract_image_caption(image_data):
10
+ """
11
+ 利用预训练模型从图像中提取描述性文字。
12
+ """
13
+ img_obj = Image.open(image_data)
14
+ caption_pipeline = pipeline(
15
+ "image-to-text",
16
+ model="Salesforce/blip-image-captioning-base",
17
+ )
18
+ caption_results = caption_pipeline(img_obj)
19
+ caption_text = caption_results[0]['generated_text']
20
+ return caption_text
21
 
22
  def compose_story_from_caption(caption_detail):
23
+ """
24
+ 根据图像描述创作一篇充满创意的儿童故事。
25
+ """
26
+ story_pipeline = pipeline(
27
+ "text-generation",
28
+ model="Qwen/Qwen2-1.5B",
29
+ )
30
+ prompt_text = (
31
+ "You are a talented and imaginative storyteller for children aged 3 to 10. "
32
+ "Using the details derived from the image below, craft a captivating tale that goes beyond merely describing the scene. "
33
+ "Let your creativity shine by introducing engaging characters, adventurous journeys, and delightful surprises. "
34
+ "Your story should be vivid, original, and between 100 and 300 words in length.\n\n"
35
+ f"Image Details: {caption_detail}\n\nStory:"
36
+ )
37
+ story_results = story_pipeline(prompt_text, num_return_sequences=1)
38
+ story_text = story_results[0]['generated_text']
39
+ if "Story:" in story_text:
40
+ story = story_text.split("Story:", 1)[1].strip()
41
+ else:
42
+ story = story_text.strip()
43
+ return story
44
 
45
  def convert_text_to_audio(text_content, audio_path="output.mp3"):
46
+ """
47
+ 将文本转换为音频文件。
48
+ """
49
+ tts_engine = gTTS(text=text_content, lang="en")
50
+ tts_engine.save(audio_path)
51
+ return audio_path
52
 
53
  def run_app():
54
+ st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
55
+ st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
56
+ uploaded_image = st.file_uploader("Select an Image", type=["png", "jpg", "jpeg"])
57
+
58
+ if uploaded_image is not None:
59
+ image_display = Image.open(uploaded_image)
60
+ st.image(image_display, caption="Uploaded Image", use_container_width=True)
61
 
62
+ with st.spinner("Generating caption for the image..."):
63
+ caption_text = extract_image_caption(uploaded_image)
64
+ st.write("**Generated Caption:**", caption_text)
65
 
66
+ with st.spinner("Composing story..."):
67
+ story_text = compose_story_from_caption(caption_text)
68
+ st.write("**Story:**")
69
+ st.write(story_text)
70
+
71
+ with st.spinner("Converting text to audio..."):
72
+ audio_file = convert_text_to_audio(story_text)
73
+ st.audio(audio_file, format="audio/mp3")