Spaces:

yuvaranianandhan24
/

story_telling

Sleeping

App Files Files Community

yuvaranianandhan24 commited on Apr 23, 2024

Commit

268f3a0

verified ·

1 Parent(s): 068d689

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -38

app.py CHANGED Viewed

@@ -6,6 +6,10 @@ import openai
 from langchain import LLMChain, PromptTemplate
 from langchain import HuggingFaceHub
 # Suppressing all warnings
 import warnings
 warnings.filterwarnings("ignore")
@@ -60,48 +64,85 @@ def txt2speech(text):
     with open('audio_story.mp3', 'wb') as file:
         file.write(response.content)
 # Streamlit web app main function
 def main():
-    st.set_page_config(page_title="🎨 Image-to-Audio Story 🎧", page_icon="🖼️")
-    st.title("Turn the Image into Audio Story")
-    # Allows users to upload an image file
-    uploaded_file = st.file_uploader("# 📷 Upload an image...", type=["jpg", "jpeg", "png"])
-    # Parameters for LLM model (in the sidebar)
-    st.sidebar.markdown("# LLM Inference Configuration Parameters")
-    top_k = st.sidebar.number_input("Top-K", min_value=1, max_value=100, value=5)
-    top_p = st.sidebar.number_input("Top-P", min_value=0.0, max_value=1.0, value=0.8)
-    temperature = st.sidebar.number_input("Temperature", min_value=0.1, max_value=2.0, value=1.5)
-    if uploaded_file is not None:
-        # Reads and saves uploaded image file
-        bytes_data = uploaded_file.read()
-        with open("uploaded_image.jpg", "wb") as file:
-            file.write(bytes_data)
-        st.image(uploaded_file, caption='🖼️ Uploaded Image', use_column_width=True)
-        # Initiates AI processing and story generation
-        with st.spinner("## 🤖 AI is at Work! "):
-            scenario = img2txt("uploaded_image.jpg")  # Extracts text from the image
-            story = generate_story(scenario, llm)  # Generates a story based on the image text, LLM params
-            txt2speech(story)  # Converts the story to audio
-            st.markdown("---")
-            st.markdown("## 📜 Image Caption")
-            st.write(scenario)
-            st.markdown("---")
-            st.markdown("## 📖 Story")
-            st.write(story)
-            st.markdown("---")
-            st.markdown("## 🎧 Audio Story")
-            st.audio("audio_story.mp3")
 if __name__ == '__main__':
     main()

 from langchain import LLMChain, PromptTemplate
 from langchain import HuggingFaceHub
+from diffusers import StableDiffusionPipeline, EulerDiscreteScheduler
+import torch
 # Suppressing all warnings
 import warnings
 warnings.filterwarnings("ignore")
     with open('audio_story.mp3', 'wb') as file:
         file.write(response.content)
+# text-to- image
+def txt2img(text, style="realistic"):
+    model_id = "stabilityai/stable-diffusion-2"
+    # Use the Euler scheduler here instead
+    scheduler = EulerDiscreteScheduler.from_pretrained(model_id, subfolder="scheduler")
+    pipe = StableDiffusionPipeline.from_pretrained(model_id, scheduler=scheduler, torch_dtype=torch.float16)
+    pipe = pipe.to("cuda")
+    image = pipe(prompt = text, guidance_scale = 7.5).images[0]
+    return image
+st.sidebar.title("Choose the task")
 # Streamlit web app main function
 def main():
+    with st.sidebar.expander("Audio Story"):
+        st.set_page_config(page_title="🎨 Image-to-Audio Story 🎧", page_icon="🖼️")
+        st.title("Turn the Image into Audio Story")
+        # Allows users to upload an image file
+        uploaded_file = st.file_uploader("# 📷 Upload an image...", type=["jpg", "jpeg", "png"])
+        # Parameters for LLM model (in the sidebar)
+        #st.sidebar.markdown("# LLM Inference Configuration Parameters")
+        #top_k = st.sidebar.number_input("Top-K", min_value=1, max_value=100, value=5)
+        #top_p = st.sidebar.number_input("Top-P", min_value=0.0, max_value=1.0, value=0.8)
+        #temperature = st.sidebar.number_input("Temperature", min_value=0.1, max_value=2.0, value=1.5)
+        if uploaded_file is not None:
+            # Reads and saves uploaded image file
+            bytes_data = uploaded_file.read()
+            with open("uploaded_image.jpg", "wb") as file:
+                file.write(bytes_data)
+            st.image(uploaded_file, caption='🖼️ Uploaded Image', use_column_width=True)
+            # Initiates AI processing and story generation
+            with st.spinner("## 🤖 AI is at Work! "):
+                scenario = img2txt("uploaded_image.jpg")  # Extracts text from the image
+                story = generate_story(scenario, llm)  # Generates a story based on the image text, LLM params
+                txt2speech(story)  # Converts the story to audio
+                st.markdown("---")
+                st.markdown("## 📜 Image Caption")
+                st.write(scenario)
+                st.markdown("---")
+                st.markdown("## 📖 Story")
+                st.write(story)
+                st.markdown("---")
+                st.markdown("## 🎧 Audio Story")
+                st.audio("audio_story.mp3")
+    with st.sidebar.expander("Image Generator"):
+        st.title("Stable Diffusion Image Generation")
+        st.write("This app lets you generate images using Stable Diffusion with the Euler scheduler.")
+        prompt = st.text_input("Enter your prompt:")
+        image_style = st.selectbox("Style Selection", ["realistic", "cartoon", "watercolor"])
+        if st.button("Generate Image"):
+          if prompt:
+            with st.spinner("Generating image..."):
+              image = txt2img(prompt= prompt, style = image_style)
+            st.image(image)
+          else:
+            st.error("Please enter a prompt.")
+    st.title("Welcome to your Creative Canvas!")
+    st.write("Use the tools in the sidebar to create audio stories and unique images.")
 if __name__ == '__main__':
     main()