Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,10 @@ import openai
|
|
6 |
from langchain import LLMChain, PromptTemplate
|
7 |
from langchain import HuggingFaceHub
|
8 |
|
|
|
|
|
|
|
|
|
9 |
# Suppressing all warnings
|
10 |
import warnings
|
11 |
warnings.filterwarnings("ignore")
|
@@ -60,48 +64,85 @@ def txt2speech(text):
|
|
60 |
|
61 |
with open('audio_story.mp3', 'wb') as file:
|
62 |
file.write(response.content)
|
63 |
-
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
|
66 |
# Streamlit web app main function
|
67 |
def main():
|
68 |
-
st.
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
if __name__ == '__main__':
|
107 |
main()
|
|
|
6 |
from langchain import LLMChain, PromptTemplate
|
7 |
from langchain import HuggingFaceHub
|
8 |
|
9 |
+
from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
|
10 |
+
import torch
|
11 |
+
|
12 |
+
|
13 |
# Suppressing all warnings
|
14 |
import warnings
|
15 |
warnings.filterwarnings("ignore")
|
|
|
64 |
|
65 |
with open('audio_story.mp3', 'wb') as file:
|
66 |
file.write(response.content)
|
|
|
67 |
|
68 |
+
|
69 |
+
# text-to- image
|
70 |
+
def txt2img(text, style="realistic"):
|
71 |
+
model_id = "stabilityai/stable-diffusion-2"
|
72 |
+
|
73 |
+
# Use the Euler scheduler here instead
|
74 |
+
scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
|
75 |
+
pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
|
76 |
+
pipe = pipe.to("cuda")
|
77 |
+
image = pipe(prompt = text, guidance_scale = 7.5).images[0]
|
78 |
+
return image
|
79 |
+
|
80 |
+
|
81 |
+
st.sidebar.title("Choose the task")
|
82 |
|
83 |
# Streamlit web app main function
|
84 |
def main():
|
85 |
+
with st.sidebar.expander("Audio Story"):
|
86 |
+
|
87 |
+
st.set_page_config(page_title="π¨ Image-to-Audio Story π§", page_icon="πΌοΈ")
|
88 |
+
st.title("Turn the Image into Audio Story")
|
89 |
+
|
90 |
+
# Allows users to upload an image file
|
91 |
+
uploaded_file = st.file_uploader("# π· Upload an image...", type=["jpg", "jpeg", "png"])
|
92 |
+
|
93 |
+
# Parameters for LLM model (in the sidebar)
|
94 |
+
#st.sidebar.markdown("# LLM Inference Configuration Parameters")
|
95 |
+
#top_k = st.sidebar.number_input("Top-K", min_value=1, max_value=100, value=5)
|
96 |
+
#top_p = st.sidebar.number_input("Top-P", min_value=0.0, max_value=1.0, value=0.8)
|
97 |
+
#temperature = st.sidebar.number_input("Temperature", min_value=0.1, max_value=2.0, value=1.5)
|
98 |
+
|
99 |
+
if uploaded_file is not None:
|
100 |
+
# Reads and saves uploaded image file
|
101 |
+
bytes_data = uploaded_file.read()
|
102 |
+
with open("uploaded_image.jpg", "wb") as file:
|
103 |
+
file.write(bytes_data)
|
104 |
+
|
105 |
+
st.image(uploaded_file, caption='πΌοΈ Uploaded Image', use_column_width=True)
|
106 |
+
|
107 |
+
# Initiates AI processing and story generation
|
108 |
+
with st.spinner("## π€ AI is at Work! "):
|
109 |
+
scenario = img2txt("uploaded_image.jpg") # Extracts text from the image
|
110 |
+
story = generate_story(scenario, llm) # Generates a story based on the image text, LLM params
|
111 |
+
txt2speech(story) # Converts the story to audio
|
112 |
+
|
113 |
+
st.markdown("---")
|
114 |
+
st.markdown("## π Image Caption")
|
115 |
+
st.write(scenario)
|
116 |
+
|
117 |
+
st.markdown("---")
|
118 |
+
st.markdown("## π Story")
|
119 |
+
st.write(story)
|
120 |
+
|
121 |
+
st.markdown("---")
|
122 |
+
st.markdown("## π§ Audio Story")
|
123 |
+
st.audio("audio_story.mp3")
|
124 |
+
|
125 |
+
with st.sidebar.expander("Image Generator"):
|
126 |
+
st.title("Stable Diffusion Image Generation")
|
127 |
+
st.write("This app lets you generate images using Stable Diffusion with the Euler scheduler.")
|
128 |
+
|
129 |
+
prompt = st.text_input("Enter your prompt:")
|
130 |
+
image_style = st.selectbox("Style Selection", ["realistic", "cartoon", "watercolor"])
|
131 |
+
|
132 |
+
if st.button("Generate Image"):
|
133 |
+
if prompt:
|
134 |
+
with st.spinner("Generating image..."):
|
135 |
+
image = txt2img(prompt= prompt, style = image_style)
|
136 |
+
st.image(image)
|
137 |
+
else:
|
138 |
+
st.error("Please enter a prompt.")
|
139 |
+
|
140 |
+
|
141 |
+
st.title("Welcome to your Creative Canvas!")
|
142 |
+
st.write("Use the tools in the sidebar to create audio stories and unique images.")
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
|
147 |
if __name__ == '__main__':
|
148 |
main()
|