IrisDeng commited on
Commit
c9b1bb5
·
verified ·
1 Parent(s): 26aff4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -34
app.py CHANGED
@@ -2,58 +2,35 @@ import streamlit as st
2
  from PIL import Image
3
  from transformers import pipeline
4
  from gtts import gTTS
5
- import torch
6
 
7
  st.set_page_config(page_title="Image to Audio Story", page_icon="🦜")
8
 
 
 
 
9
 
10
  def extract_image_caption(image_data):
11
- """
12
- 利用预训练模型从图像中提取描述性文字。
13
- """
14
  img_obj = Image.open(image_data)
15
- caption_pipeline = pipeline(
16
- "image-to-text",
17
- model="Salesforce/blip-image-captioning-base",
18
- )
19
  caption_results = caption_pipeline(img_obj)
20
- caption_text = caption_results[0]['generated_text']
21
- return caption_text
22
-
23
 
24
  def compose_story_from_caption(caption_detail):
25
- """
26
- 根据图像描述创作一篇充满创意的儿童故事。
27
- """
28
- story_pipeline = pipeline(
29
- "text-generation",
30
- model="Qwen/Qwen2-1.5B",
31
- )
32
  prompt_text = (
33
- "You are a talented and imaginative storyteller for children aged 3 to 10. "
34
- "Using the details derived from the image below, craft a captivating tale that goes beyond merely describing the scene. "
35
- "Let your creativity shine by introducing engaging characters, adventurous journeys, and delightful surprises. "
36
- "Your story should be vivid, original, and between 100 and 300 words in length.\n\n"
37
- f"Image Details: {caption_detail}\n\nStory:"
38
  )
39
  story_results = story_pipeline(prompt_text, num_return_sequences=1)
40
  story_text = story_results[0]['generated_text']
41
- if "Story:" in story_text:
42
- story = story_text.split("Story:", 1)[1].strip()
43
- else:
44
- story = story_text.strip()
45
- return story
46
-
47
 
48
  def convert_text_to_audio(text_content, audio_path="output.mp3"):
49
- """
50
- 将文本转换为音频文件。
51
- """
52
  tts_engine = gTTS(text=text_content, lang="en")
53
  tts_engine.save(audio_path)
54
  return audio_path
55
 
56
-
57
  def run_app():
58
  st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
59
  st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
@@ -77,6 +54,5 @@ def run_app():
77
  audio_file = convert_text_to_audio(story_text)
78
  st.audio(audio_file, format="audio/mp3")
79
 
80
-
81
  if __name__ == "__main__":
82
  run_app()
 
2
  from PIL import Image
3
  from transformers import pipeline
4
  from gtts import gTTS
 
5
 
6
  st.set_page_config(page_title="Image to Audio Story", page_icon="🦜")
7
 
8
+ # Load models once
9
+ caption_pipeline = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
+ story_pipeline = pipeline("text-generation", model="Qwen/Qwen2-1.5B")
11
 
12
  def extract_image_caption(image_data):
 
 
 
13
  img_obj = Image.open(image_data)
 
 
 
 
14
  caption_results = caption_pipeline(img_obj)
15
+ return caption_results[0]['generated_text']
 
 
16
 
17
  def compose_story_from_caption(caption_detail):
 
 
 
 
 
 
 
18
  prompt_text = (
19
+ "You are a talented and imaginative storyteller for children aged 3 to 10. "
20
+ "Using the details derived from the image below, craft a captivating tale that goes beyond merely describing the scene. "
21
+ "Let your creativity shine by introducing engaging characters, adventurous journeys, and delightful surprises. "
22
+ "Your story should be vivid, original, and between 100 and 300 words in length.\n\n"
23
+ f"Image Details: {caption_detail}\n\nStory:"
24
  )
25
  story_results = story_pipeline(prompt_text, num_return_sequences=1)
26
  story_text = story_results[0]['generated_text']
27
+ return story_text.split("Story:", 1)[1].strip() if "Story:" in story_text else story_text.strip()
 
 
 
 
 
28
 
29
  def convert_text_to_audio(text_content, audio_path="output.mp3"):
 
 
 
30
  tts_engine = gTTS(text=text_content, lang="en")
31
  tts_engine.save(audio_path)
32
  return audio_path
33
 
 
34
  def run_app():
35
  st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
36
  st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
 
54
  audio_file = convert_text_to_audio(story_text)
55
  st.audio(audio_file, format="audio/mp3")
56
 
 
57
  if __name__ == "__main__":
58
  run_app()