Spaces:

awacke1
/

GPT-4o-omni-text-audio-image-video

Running

App Files Files Community

awacke1 commited on May 21, 2024

Commit

2899a5e

verified ·

1 Parent(s): 9b70442

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -11

app.py CHANGED Viewed

@@ -29,14 +29,26 @@ def create_file(filename, prompt, response, should_save=True):
 def process_text(text_input):
     if text_input:
         st.session_state.messages.append({"role": "user", "content": text_input})
-        st.chat_message("user", text_input)
         completion = client.chat.completions.create(model=MODEL, messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages], stream=False)
         return_text = completion.choices[0].message.content
-        st.chat_message("assistant", return_text)
         filename = generate_filename(text_input, "md")
         create_file(filename, text_input, return_text)
         st.session_state.messages.append({"role": "assistant", "content": return_text})
 def save_image(image_input, filename):
     with open(filename, "wb") as f:
         f.write(image_input.getvalue())
@@ -44,12 +56,14 @@ def save_image(image_input, filename):
 def process_image(image_input):
     if image_input:
-        st.chat_message("user", 'Processing image:  ' + image_input.name)
         base64_image = base64.b64encode(image_input.read()).decode("utf-8")
         st.session_state.messages.append({"role": "user", "content": [{"type": "text", "text": "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}]})
         response = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, temperature=0.0)
         image_response = response.choices[0].message.content
-        st.chat_message("assistant", image_response)
         filename_md, filename_img = generate_filename(image_input.name + '- ' + image_response, "md"), image_input.name
         create_file(filename_md, image_response, '', True)
         with open(filename_md, "w", encoding="utf-8") as f:
@@ -64,7 +78,8 @@ def process_audio(audio_input):
         transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_input)
         response = client.chat.completions.create(model=MODEL, messages=[{"role": "system", "content":"You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."}, {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription.text}"}]}], temperature=0)
         audio_response = response.choices[0].message.content
-        st.chat_message("assistant", audio_response)
         filename = generate_filename(transcription.text, "md")
         create_file(filename, transcription.text, audio_response, should_save=True)
         st.session_state.messages.append({"role": "assistant", "content": audio_response})
@@ -77,7 +92,8 @@ def process_audio_and_video(video_input):
         st.session_state.messages.append({"role": "user", "content": ["These are the frames from the video.", *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames), {"type": "text", "text": f"The audio transcription is: {transcript}"}]})
         response = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, temperature=0)
         video_response = response.choices[0].message.content
-        st.chat_message("assistant", video_response)
         filename = generate_filename(transcript, "md")
         create_file(filename, transcript, video_response, should_save=True)
         st.session_state.messages.append({"role": "assistant", "content": video_response})
@@ -88,7 +104,8 @@ def process_audio_for_video(video_input):
         transcription = client.audio.transcriptions.create(model="whisper-1", file=video_input)
         response = client.chat.completions.create(model=MODEL, messages=[{"role": "system", "content":"You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."}, {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}]}], temperature=0)
         video_response = response.choices[0].message.content
-        st.chat_message("assistant", video_response)
         filename = generate_filename(transcription, "md")
         create_file(filename, transcription, video_response, should_save=True)
         st.session_state.messages.append({"role": "assistant", "content": video_response})
@@ -156,10 +173,11 @@ def main():
     if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
         st.session_state.messages.append({"role": "user", "content": prompt})
-        st.chat_message("user", prompt)
         with st.chat_message("assistant"):
             completion = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, stream=True)
-            response = process_text(text_input=prompt)
         st.session_state.messages.append({"role": "assistant", "content": response})
     filename = save_and_play_audio(audio_recorder)
@@ -167,10 +185,11 @@ def main():
         transcript = transcribe_canary(filename)
         result = search_arxiv(transcript)
         st.session_state.messages.append({"role": "user", "content": transcript})
-        st.chat_message("user", transcript)
         with st.chat_message("assistant"):
             completion = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, stream=True)
-            response = process_text(text_input=prompt)
         st.session_state.messages.append({"role": "assistant", "content": response})
 if __name__ == "__main__":

 def process_text(text_input):
     if text_input:
         st.session_state.messages.append({"role": "user", "content": text_input})
+        with st.chat_message("user"):
+            st.markdown(text_input)
         completion = client.chat.completions.create(model=MODEL, messages=[{"role": m["role"], "content": m["content"]} for m in st.session_state.messages], stream=False)
         return_text = completion.choices[0].message.content
+        with st.chat_message("assistant"):
+            st.markdown(return_text)
         filename = generate_filename(text_input, "md")
         create_file(filename, text_input, return_text)
         st.session_state.messages.append({"role": "assistant", "content": return_text})
+def process_text2(MODEL='gpt-4o-2024-05-13', text_input='What is 2+2 and what is an imaginary number'):
+    if text_input:
+        st.session_state.messages.append({"role": "user", "content": text_input})
+        completion = client.chat.completions.create(model=MODEL, messages=st.session_state.messages)
+        return_text = completion.choices[0].message.content
+        st.write("Assistant: " + return_text)
+        filename = generate_filename(text_input, "md")
+        create_file(filename, text_input, return_text, should_save=True)
+        return return_text
 def save_image(image_input, filename):
     with open(filename, "wb") as f:
         f.write(image_input.getvalue())
 def process_image(image_input):
     if image_input:
+        with st.chat_message("user"):
+            st.markdown('Processing image:  ' + image_input.name)
         base64_image = base64.b64encode(image_input.read()).decode("utf-8")
         st.session_state.messages.append({"role": "user", "content": [{"type": "text", "text": "Help me understand what is in this picture and list ten facts as markdown outline with appropriate emojis that describes what you see."}, {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{base64_image}"}}]})
         response = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, temperature=0.0)
         image_response = response.choices[0].message.content
+        with st.chat_message("assistant"):
+            st.markdown(image_response)
         filename_md, filename_img = generate_filename(image_input.name + '- ' + image_response, "md"), image_input.name
         create_file(filename_md, image_response, '', True)
         with open(filename_md, "w", encoding="utf-8") as f:
         transcription = client.audio.transcriptions.create(model="whisper-1", file=audio_input)
         response = client.chat.completions.create(model=MODEL, messages=[{"role": "system", "content":"You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."}, {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription.text}"}]}], temperature=0)
         audio_response = response.choices[0].message.content
+        with st.chat_message("assistant"):
+            st.markdown(audio_response)
         filename = generate_filename(transcription.text, "md")
         create_file(filename, transcription.text, audio_response, should_save=True)
         st.session_state.messages.append({"role": "assistant", "content": audio_response})
         st.session_state.messages.append({"role": "user", "content": ["These are the frames from the video.", *map(lambda x: {"type": "image_url", "image_url": {"url": f'data:image/jpg;base64,{x}', "detail": "low"}}, base64Frames), {"type": "text", "text": f"The audio transcription is: {transcript}"}]})
         response = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, temperature=0)
         video_response = response.choices[0].message.content
+        with st.chat_message("assistant"):
+            st.markdown(video_response)
         filename = generate_filename(transcript, "md")
         create_file(filename, transcript, video_response, should_save=True)
         st.session_state.messages.append({"role": "assistant", "content": video_response})
         transcription = client.audio.transcriptions.create(model="whisper-1", file=video_input)
         response = client.chat.completions.create(model=MODEL, messages=[{"role": "system", "content":"You are generating a transcript summary. Create a summary of the provided transcription. Respond in Markdown."}, {"role": "user", "content": [{"type": "text", "text": f"The audio transcription is: {transcription}"}]}], temperature=0)
         video_response = response.choices[0].message.content
+        with st.chat_message("assistant"):
+            st.markdown(video_response)
         filename = generate_filename(transcription, "md")
         create_file(filename, transcription, video_response, should_save=True)
         st.session_state.messages.append({"role": "assistant", "content": video_response})
     if prompt := st.chat_input("GPT-4o Multimodal ChatBot - What can I help you with?"):
         st.session_state.messages.append({"role": "user", "content": prompt})
+        with st.chat_message("user"):
+            st.markdown(prompt)
         with st.chat_message("assistant"):
             completion = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, stream=True)
+            response = process_text2(text_input=prompt)
         st.session_state.messages.append({"role": "assistant", "content": response})
     filename = save_and_play_audio(audio_recorder)
         transcript = transcribe_canary(filename)
         result = search_arxiv(transcript)
         st.session_state.messages.append({"role": "user", "content": transcript})
+        with st.chat_message("user"):
+            st.markdown(transcript)
         with st.chat_message("assistant"):
             completion = client.chat.completions.create(model=MODEL, messages=st.session_state.messages, stream=True)
+            response = process_text2(text_input=prompt)
         st.session_state.messages.append({"role": "assistant", "content": response})
 if __name__ == "__main__":