xieqilenb commited on
Commit
ab58353
·
verified ·
1 Parent(s): 50d76c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -40
app.py CHANGED
@@ -6,56 +6,46 @@ import torch
6
 
7
  st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
8
 
9
- # 判断是否有可用的 GPU,如果有则使用 GPU(device=0),否则使用 CPU(device=-1)
10
- device_id = 0 if torch.cuda.is_available() else -1
11
 
12
  def generate_caption(image_file):
13
  image = Image.open(image_file)
14
- # 使用 GPU 进行图像描述生成,如果可用
15
  caption_generator = pipeline(
16
  "image-to-text",
17
  model="Salesforce/blip-image-captioning-base",
18
- device=device_id
19
  )
20
  caption_results = caption_generator(image)
21
  caption = caption_results[0]['generated_text']
22
  return caption
23
 
24
  def generate_story(caption):
25
- # 使用 GPU 进行文本生成操作
26
  story_generator = pipeline(
27
  "text-generation",
28
  model="Qwen/Qwen2-1.5B",
29
- device=device_id
30
  )
31
- messages = (
32
- "You are a creative children's story writer. Based on the following image details, "
33
- "please write an imaginative story for children aged 3-10. Do not simply rephrase the image details; "
34
- "instead, expand creatively by adding fun characters, adventures, and unexpected twists. "
35
- "The story must be at least 100 words long.\n\n"
36
- f"Image Details: {caption}\n\nStory:"
37
- )
38
- result = story_generator(messages, max_length=300, num_return_sequences=1)
39
- story = result[0]['generated_text']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  return story
41
- 000000000
42
- # 以下部分为生成插图示例代码,已注释。如果需要使用 GPU,请取消注释并确保 diffusers 相关依赖已经安装
43
- # @st.cache_resource
44
- # def load_image_generator():
45
- # from diffusers import DiffusionPipeline
46
- # device = "cuda" if torch.cuda.is_available() else "cpu"
47
- # torch_dtype = torch.float16 if device == "cuda" else torch.float32
48
- # pipe = DiffusionPipeline.from_pretrained(
49
- # "stable-diffusion-v1-5/stable-diffusion-v1-5", torch_dtype=torch_dtype
50
- # )
51
- # pipe = pipe.to(device)
52
- # return pipe
53
- #
54
- # def generate_illustration(prompt):
55
- # pipe = load_image_generator()
56
- # image_result = pipe(prompt)
57
- # generated_image = image_result.images[0]
58
- # return generated_image
59
 
60
  def text_to_speech(text, output_file="output.mp3"):
61
  tts = gTTS(text=text, lang="en")
@@ -80,13 +70,6 @@ def main():
80
  story = generate_story(caption)
81
  st.write("**Story:**")
82
  st.write(story)
83
-
84
- # 如果需要生成插图,请取消以下代码的注释
85
- # with st.spinner("Generating illustration..."):
86
- # illustration = generate_illustration(story[:200])
87
- # st.write("### Story Illustrations:")
88
- # st.image(illustration, caption="Story Illustrations", use_container_width=True)
89
-
90
  with st.spinner("Converting to voice..."):
91
  audio_file = text_to_speech(story)
92
  st.audio(audio_file, format="audio/mp3")
 
6
 
7
  st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
8
 
 
 
9
 
10
  def generate_caption(image_file):
11
  image = Image.open(image_file)
 
12
  caption_generator = pipeline(
13
  "image-to-text",
14
  model="Salesforce/blip-image-captioning-base",
 
15
  )
16
  caption_results = caption_generator(image)
17
  caption = caption_results[0]['generated_text']
18
  return caption
19
 
20
  def generate_story(caption):
 
21
  story_generator = pipeline(
22
  "text-generation",
23
  model="Qwen/Qwen2-1.5B",
 
24
  )
25
+ prompt = (
26
+ "You are a talented children's story writer renowned for your creativity and captivating narratives. "
27
+ "Using the image details provided below, please compose an enchanting tale tailored for children aged 3 to 10. "
28
+ "Rather than simply rephrasing the image details, enrich your story with imaginative characters, quirky adventures, "
29
+ "and delightful surprises. Let your narrative flow naturally and spark wonder in your young audience. "
30
+ "Please ensure that your story is engaging, coherent, and falls between 100 and 300 words in length.\n\n"
31
+ f"Image Details: {caption}\n\nStory:"
32
+ )
33
+
34
+ result = story_generator(prompt, max_length=300, num_return_sequences=1)
35
+ full_text = result[0]['generated_text']
36
+
37
+ if "Story:" in full_text:
38
+ story = full_text.split("Story:", 1)[1].strip()
39
+ else:
40
+ story = full_text.strip()
41
+
42
+ words = story.split()
43
+ if len(words) > 300:
44
+ story = " ".join(words[:300])
45
+ elif len(words) < 100:
46
+ story += "\n\n(Note: The generated story is shorter than the desired 100 words.)"
47
+
48
  return story
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
  def text_to_speech(text, output_file="output.mp3"):
51
  tts = gTTS(text=text, lang="en")
 
70
  story = generate_story(caption)
71
  st.write("**Story:**")
72
  st.write(story)
 
 
 
 
 
 
 
73
  with st.spinner("Converting to voice..."):
74
  audio_file = text_to_speech(story)
75
  st.audio(audio_file, format="audio/mp3")