Spaces:

xieqilenb
/

blabla

Running

App Files Files Community

blabla / app.py

xieqilenb

Update app.py

59e8ed2 verified about 22 hours ago

raw

history blame

2.91 kB

	import streamlit as st
	from PIL import Image
	from transformers import pipeline
	from gtts import gTTS
	import torch

	st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")


	def generate_caption(image_file):
	image = Image.open(image_file)
	caption_generator = pipeline(
	"image-to-text",
	model="Salesforce/blip-image-captioning-base",
	)
	caption_results = caption_generator(image)
	caption = caption_results[0]['generated_text']
	return caption

	def generate_story(caption):
	story_generator = pipeline(
	"text-generation",
	model="Qwen/Qwen2.5-0.5B-Instruct",
	)
	prompt = (
	"You are a highly imaginative children's story writer celebrated for your creativity and captivating narratives. "
	"Using the image details provided below, please craft an enchanting tale tailored for children aged 3 to 10. "
	"Rather than simply reiterating the image details, enhance your story with imaginative characters, quirky adventures, "
	"and delightful surprises that ignite wonder in every young heart. Let your narrative flow naturally and ensure that your story is complete, with a clear beginning, middle, and end. "
	"Please ensure the total word count does not exceed 80 words, and do not leave the story incomplete.\n\n"
	f"Image Details: {caption}\n\nStory:"
	)

	result = story_generator(
	prompt,
	max_new_tokens=100,
	num_return_sequences=1,
	do_sample=True,
	temperature=1.0
	)
	full_text = result[0]['generated_text']

	if "Story:" in full_text:
	story = full_text.split("Story:", 1)[1].strip()
	else:
	story = full_text.strip()

	return story

	def text_to_speech(text, output_file="output.mp3"):
	tts = gTTS(text=text, lang="en")
	tts.save(output_file)
	return output_file

	def main():
	st.markdown("<h1 style='text-align: center;'>Your Image to Audio Story 🦜</h1>", unsafe_allow_html=True)
	st.write("Upload an image below and we will generate an engaging story from the picture, then convert the story into an audio playback!")

	uploaded_file = st.file_uploader("Select Image", type=["png", "jpg", "jpeg"])

	if uploaded_file is not None:
	image = Image.open(uploaded_file)
	st.image(image, caption="Uploaded image", use_container_width=True)

	with st.spinner("Image caption being generated..."):
	caption = generate_caption(uploaded_file)
	st.write("Image Caption:", caption)

	with st.spinner("Generating story..."):
	story = generate_story(caption)
	st.write("Story:")
	st.write(story)
	with st.spinner("Converting to voice..."):
	audio_file = text_to_speech(story)
	st.audio(audio_file, format="audio/mp3")

	if __name__ == "__main__":
	main()