xieqilenb commited on
Commit
7f4a56e
·
verified ·
1 Parent(s): fd902a3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -61
app.py CHANGED
@@ -1,70 +1,74 @@
1
  import streamlit as st
2
- from PIL import Image
3
  from transformers import pipeline
 
4
  from gtts import gTTS
 
5
 
6
- st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
7
-
8
- def generate_caption(image_file):
9
- image = Image.open(image_file)
10
- caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
11
- caption_results = caption_generator(image)
12
- caption = caption_results[0]['generated_text']
13
- return caption
14
 
15
- def generate_story(caption):
16
- story_generator = pipeline("text-generation", model="Qwen/Qwen2-1.5B")
17
- messages = "Please based on following image caption: "+caption+", generate a complete fairy tale story for children with at least 100 words and max 300 words"
18
- result = story_generator(messages, max_length=300, num_return_sequences=1)
19
- story = result[0]['generated_text']
20
- return story
21
 
22
- @st.cache_resource
23
- # def load_image_generator():
24
- # device = "cuda" if torch.cuda.is_available() else "cpu"
25
- # torch_dtype = torch.float16 if device == "cuda" else torch.float32
26
- # pipe = DiffusionPipeline.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
27
- # pipe = pipe.to(device)
28
- # return pipe
29
-
30
- # def generate_illustration(prompt):
31
- # pipe = load_image_generator()
32
- # image_result = pipe(prompt)
33
- # generated_image = image_result.images[0]
34
- # return generated_image
35
-
36
- def text_to_speech(text, output_file="output.mp3"):
37
  tts = gTTS(text=text, lang="en")
38
- tts.save(output_file)
39
- return output_file
 
 
 
 
 
 
40
 
41
- def main():
42
- st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
43
- st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")
44
-
45
- uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])
46
-
47
- if uploaded_file is not None:
48
- image = Image.open(uploaded_file)
49
- st.image(image, caption="Uploaded image", use_container_width=True)
50
-
51
- with st.spinner("Image caption being generated..."):
52
- caption = generate_caption(uploaded_file)
53
- st.write("**Image Caption:**", caption)
54
-
55
- with st.spinner("Generating story..."):
56
- story = generate_story(caption)
57
- st.write("**Story:**")
58
- st.write(story)
59
-
60
- # with st.spinner("Generating illustration..."):
61
- # illustration = generate_illustration(story[:200])
62
- # st.write("### Story Illustrations:")
63
- # st.image(illustration, caption="Story Illustrations", use_container_width=True)
64
-
65
- with st.spinner("Converting to voice...."):
66
- audio_file = text_to_speech(story)
67
- st.audio(audio_file, format="audio/mp3")
68
 
69
- if __name__ == "__main__":
70
- main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
 
2
  from transformers import pipeline
3
+ from PIL import Image
4
  from gtts import gTTS
5
+ from io import BytesIO
6
 
7
+ def get_image_captioner():
8
+ return pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
 
 
 
 
 
9
 
10
+ def get_story_generator():
11
+ return pipeline("text-generation", model="Qwen/Qwen2-1.5B")
 
 
 
 
12
 
13
+ def text_to_speech(text):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  tts = gTTS(text=text, lang="en")
15
+ audio_bytes = BytesIO()
16
+ tts.write_to_fp(audio_bytes)
17
+ audio_bytes.seek(0)
18
+ return audio_bytes
19
+
20
+ st.set_page_config(page_title="Image to Audio Story", page_icon="🦜")
21
+ st.title("Children's Storytelling App")
22
+ st.write("Upload an image and let the magic create a story, then convert it to audio!")
23
 
24
+ uploaded_file = st.file_uploader("Select an image...", type=["jpg", "png", "jpeg"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ if uploaded_file is not None:
27
+ try:
28
+ image = Image.open(uploaded_file).convert("RGB")
29
+ st.image(image, caption="Uploaded Image", use_container_width=True)
30
+ if st.button("Generate Story"):
31
+ with st.spinner("Generating content..."):
32
+ captioner = get_image_captioner()
33
+ caption_result = captioner(image)
34
+ caption = caption_result[0]["generated_text"]
35
+ st.subheader("Image Caption")
36
+ st.write(caption)
37
+ prompt = (
38
+ "You are a creative children's story writer. Based on the following image details, "
39
+ "please write an imaginative story for children aged 3-10. Do not simply rephrase the image details; "
40
+ "instead, expand creatively by adding fun characters, adventures, and unexpected twists. "
41
+ "The story must be at least 100 words long.\n\n"
42
+ f"Image Details: {caption}\n\nStory:"
43
+ )
44
+ story_generator = get_story_generator()
45
+ story_result = story_generator(
46
+ prompt,
47
+ max_length=300,
48
+ min_length=100,
49
+ num_return_sequences=1,
50
+ do_sample=True,
51
+ top_p=0.95,
52
+ top_k=50
53
+ )
54
+ story = story_result[0]["generated_text"]
55
+ while len(story.split()) < 100:
56
+ story_result = story_generator(
57
+ prompt,
58
+ max_length=300,
59
+ min_length=100,
60
+ num_return_sequences=1,
61
+ do_sample=True,
62
+ top_p=0.95,
63
+ top_k=50
64
+ )
65
+ story = story_result[0]["generated_text"]
66
+ if "Story:" in story:
67
+ story = story.split("Story:", 1)[-1].strip()
68
+ st.subheader("Generated Story")
69
+ st.write(story)
70
+ audio_bytes = text_to_speech(story)
71
+ st.subheader("Listen to the Story")
72
+ st.audio(audio_bytes, format="audio/mp3")
73
+ except Exception as e:
74
+ st.error(f"An error occurred: {e}")