miltonc commited on
Commit
885aabb
·
1 Parent(s): 5cd11e3

first commit

Browse files
Files changed (2) hide show
  1. app.py +59 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from gtts import gTTS
4
+ import os
5
+ from PIL import Image
6
+
7
+ # Load models
8
+ def load_models():
9
+ image_to_text = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
10
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
11
+ return image_to_text, summarizer
12
+
13
+ # Process image to text
14
+ def generate_caption(image, image_to_text):
15
+ result = image_to_text(image)
16
+ return result[0]["generated_text"] if result else "No caption generated."
17
+
18
+ # Summarize text
19
+ def summarize_text(text, summarizer):
20
+ summary = summarizer(text, max_length=30, min_length=10, do_sample=False)
21
+ return summary[0]["summary_text"] if summary else "No summary generated."
22
+
23
+ # Convert text to speech
24
+ def text_to_speech(text, filename="output.mp3"):
25
+ tts = gTTS(text)
26
+ tts.save(filename)
27
+ return filename
28
+
29
+ # Main Streamlit app
30
+ def main():
31
+ st.title("AI-Powered Image Captioning, Summarization, and Speech")
32
+
33
+ image_to_text, summarizer = load_models()
34
+
35
+ uploaded_file = st.file_uploader("Upload an image...", type=["jpg", "png", "jpeg"])
36
+
37
+ if uploaded_file is not None:
38
+ # Convert uploaded file to a PIL image
39
+ image = Image.open(uploaded_file)
40
+
41
+ # Display the uploaded image
42
+ st.image(image, caption="Uploaded Image", use_column_width=True)
43
+
44
+ with st.spinner("Generating caption..."):
45
+ caption = generate_caption(image, image_to_text)
46
+ st.write("### Image Caption:")
47
+ st.write(caption)
48
+
49
+ with st.spinner("Summarizing caption..."):
50
+ summary = summarize_text(caption, summarizer)
51
+ st.write("### Summary:")
52
+ st.write(summary)
53
+
54
+ with st.spinner("Generating speech..."):
55
+ audio_file = text_to_speech(summary)
56
+ st.audio(audio_file, format="audio/mp3")
57
+
58
+ if __name__ == "__main__":
59
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ Pillow
5
+ gtts