|
import streamlit as st |
|
from PIL import Image |
|
from transformers import pipeline |
|
|
|
|
|
|
|
|
|
def generate_caption(image_file): |
|
""" |
|
使用 Hugging Face pipeline 的 image-to-text 模型生成图片描述 |
|
参数: |
|
image_file: 上传的图片文件(文件对象或文件路径) |
|
返回: |
|
caption: 生成的图片描述文本 |
|
""" |
|
|
|
image = Image.open(image_file) |
|
|
|
caption_generator = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base") |
|
|
|
caption_results = caption_generator(image) |
|
caption = caption_results[0]['generated_text'] |
|
return caption |
|
|
|
|
|
|
|
|
|
def generate_story(caption): |
|
""" |
|
基于图片描述生成完整故事,确保生成的故事至少包含100个单词。 |
|
参数: |
|
caption: 图片描述文本 |
|
返回: |
|
story: 生成的故事文本 |
|
""" |
|
|
|
story_generator = pipeline("text-generation", model="gpt2") |
|
|
|
prompt = f"Based on the following image caption: '{caption}', generate a complete fairy tale story for children with at least 100 words. " |
|
|
|
|
|
result = story_generator(prompt, max_length=300, num_return_sequences=1) |
|
story = result[0]['generated_text'] |
|
|
|
|
|
if len(story.split()) < 100: |
|
additional = story_generator(prompt, max_length=350, num_return_sequences=1)[0]['generated_text'] |
|
story += " " + additional |
|
return story |
|
|
|
|
|
|
|
|
|
def text_to_speech(text, output_file="output.mp3"): |
|
""" |
|
将文本转换为语音并保存为 mp3 文件 |
|
参数: |
|
text: 要转换的文本 |
|
output_file: 保存的音频文件名 |
|
返回: |
|
output_file: 转换后的音频文件路径 |
|
""" |
|
from gtts import gTTS |
|
|
|
|
|
tts = gTTS(text=text, lang="en") |
|
tts.save(output_file) |
|
return output_file |
|
|
|
|
|
|
|
|
|
def main(): |
|
st.title("儿童故事生成应用") |
|
st.write("上传一张图片,我们将根据图片生成有趣的故事,并转换成语音播放!") |
|
|
|
uploaded_file = st.file_uploader("选择一张图片", type=["png", "jpg", "jpeg"]) |
|
|
|
if uploaded_file is not None: |
|
|
|
image = Image.open(uploaded_file) |
|
st.image(image, caption="上传的图片", use_column_width=True) |
|
|
|
|
|
with st.spinner("正在生成图片描述..."): |
|
caption = generate_caption(uploaded_file) |
|
st.write("图片描述:", caption) |
|
|
|
|
|
with st.spinner("正在生成故事..."): |
|
story = generate_story(caption) |
|
st.write("生成的故事:") |
|
st.write(story) |
|
|
|
|
|
with st.spinner("正在转换成语音..."): |
|
audio_file = text_to_speech(story) |
|
st.audio(audio_file, format="audio/mp3") |
|
|
|
if __name__ == "__main__": |
|
main() |