yuvaranianandhan24 commited on
Commit
268f3a0
Β·
verified Β·
1 Parent(s): 068d689

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -38
app.py CHANGED
@@ -6,6 +6,10 @@ import openai
6
  from langchain import LLMChain, PromptTemplate
7
  from langchain import HuggingFaceHub
8
 
 
 
 
 
9
  # Suppressing all warnings
10
  import warnings
11
  warnings.filterwarnings("ignore")
@@ -60,48 +64,85 @@ def txt2speech(text):
60
 
61
  with open('audio_story.mp3', 'wb') as file:
62
  file.write(response.content)
63
-
64
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  # Streamlit web app main function
67
  def main():
68
- st.set_page_config(page_title="🎨 Image-to-Audio Story 🎧", page_icon="πŸ–ΌοΈ")
69
- st.title("Turn the Image into Audio Story")
70
-
71
- # Allows users to upload an image file
72
- uploaded_file = st.file_uploader("# πŸ“· Upload an image...", type=["jpg", "jpeg", "png"])
73
-
74
- # Parameters for LLM model (in the sidebar)
75
- st.sidebar.markdown("# LLM Inference Configuration Parameters")
76
- top_k = st.sidebar.number_input("Top-K", min_value=1, max_value=100, value=5)
77
- top_p = st.sidebar.number_input("Top-P", min_value=0.0, max_value=1.0, value=0.8)
78
- temperature = st.sidebar.number_input("Temperature", min_value=0.1, max_value=2.0, value=1.5)
79
-
80
- if uploaded_file is not None:
81
- # Reads and saves uploaded image file
82
- bytes_data = uploaded_file.read()
83
- with open("uploaded_image.jpg", "wb") as file:
84
- file.write(bytes_data)
85
-
86
- st.image(uploaded_file, caption='πŸ–ΌοΈ Uploaded Image', use_column_width=True)
87
-
88
- # Initiates AI processing and story generation
89
- with st.spinner("## πŸ€– AI is at Work! "):
90
- scenario = img2txt("uploaded_image.jpg") # Extracts text from the image
91
- story = generate_story(scenario, llm) # Generates a story based on the image text, LLM params
92
- txt2speech(story) # Converts the story to audio
93
-
94
- st.markdown("---")
95
- st.markdown("## πŸ“œ Image Caption")
96
- st.write(scenario)
97
-
98
- st.markdown("---")
99
- st.markdown("## πŸ“– Story")
100
- st.write(story)
101
-
102
- st.markdown("---")
103
- st.markdown("## 🎧 Audio Story")
104
- st.audio("audio_story.mp3")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
 
106
  if __name__ == '__main__':
107
  main()
 
6
  from langchain import LLMChain, PromptTemplate
7
  from langchain import HuggingFaceHub
8
 
9
+ from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
10
+ import torch
11
+
12
+
13
  # Suppressing all warnings
14
  import warnings
15
  warnings.filterwarnings("ignore")
 
64
 
65
  with open('audio_story.mp3', 'wb') as file:
66
  file.write(response.content)
 
67
 
68
+
69
+ # text-to- image
70
+ def txt2img(text, style="realistic"):
71
+ model_id = "stabilityai/stable-diffusion-2"
72
+
73
+ # Use the Euler scheduler here instead
74
+ scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
75
+ pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
76
+ pipe = pipe.to("cuda")
77
+ image = pipe(prompt = text, guidance_scale = 7.5).images[0]
78
+ return image
79
+
80
+
81
+ st.sidebar.title("Choose the task")
82
 
83
  # Streamlit web app main function
84
  def main():
85
+ with st.sidebar.expander("Audio Story"):
86
+
87
+ st.set_page_config(page_title="🎨 Image-to-Audio Story 🎧", page_icon="πŸ–ΌοΈ")
88
+ st.title("Turn the Image into Audio Story")
89
+
90
+ # Allows users to upload an image file
91
+ uploaded_file = st.file_uploader("# πŸ“· Upload an image...", type=["jpg", "jpeg", "png"])
92
+
93
+ # Parameters for LLM model (in the sidebar)
94
+ #st.sidebar.markdown("# LLM Inference Configuration Parameters")
95
+ #top_k = st.sidebar.number_input("Top-K", min_value=1, max_value=100, value=5)
96
+ #top_p = st.sidebar.number_input("Top-P", min_value=0.0, max_value=1.0, value=0.8)
97
+ #temperature = st.sidebar.number_input("Temperature", min_value=0.1, max_value=2.0, value=1.5)
98
+
99
+ if uploaded_file is not None:
100
+ # Reads and saves uploaded image file
101
+ bytes_data = uploaded_file.read()
102
+ with open("uploaded_image.jpg", "wb") as file:
103
+ file.write(bytes_data)
104
+
105
+ st.image(uploaded_file, caption='πŸ–ΌοΈ Uploaded Image', use_column_width=True)
106
+
107
+ # Initiates AI processing and story generation
108
+ with st.spinner("## πŸ€– AI is at Work! "):
109
+ scenario = img2txt("uploaded_image.jpg") # Extracts text from the image
110
+ story = generate_story(scenario, llm) # Generates a story based on the image text, LLM params
111
+ txt2speech(story) # Converts the story to audio
112
+
113
+ st.markdown("---")
114
+ st.markdown("## πŸ“œ Image Caption")
115
+ st.write(scenario)
116
+
117
+ st.markdown("---")
118
+ st.markdown("## πŸ“– Story")
119
+ st.write(story)
120
+
121
+ st.markdown("---")
122
+ st.markdown("## 🎧 Audio Story")
123
+ st.audio("audio_story.mp3")
124
+
125
+ with st.sidebar.expander("Image Generator"):
126
+ st.title("Stable Diffusion Image Generation")
127
+ st.write("This app lets you generate images using Stable Diffusion with the Euler scheduler.")
128
+
129
+ prompt = st.text_input("Enter your prompt:")
130
+ image_style = st.selectbox("Style Selection", ["realistic", "cartoon", "watercolor"])
131
+
132
+ if st.button("Generate Image"):
133
+ if prompt:
134
+ with st.spinner("Generating image..."):
135
+ image = txt2img(prompt= prompt, style = image_style)
136
+ st.image(image)
137
+ else:
138
+ st.error("Please enter a prompt.")
139
+
140
+
141
+ st.title("Welcome to your Creative Canvas!")
142
+ st.write("Use the tools in the sidebar to create audio stories and unique images.")
143
+
144
+
145
+
146
 
147
  if __name__ == '__main__':
148
  main()