from openai import OpenAI import gradio as gr from pathlib import Path import base64 def call_gpt(client, text, image_b64): response = client.chat.completions.create( model="gpt-4-vision-preview", messages=[ { "role": "user", "content": [ { "type": "text", "text": "I am in a corporate conference call, give me some useful feedback on how to be more corporate using the following text and image. Be concise and don't provide general advice. If I use profanities point it out to me as something I shouldn't do in a corporate world. Use an extremely corporate, HR-like tone.", }, { "type": "text", "text": text, }, { "type": "image_url", "image_url": { "url": f"data:image/jpeg;base64,{image_b64}" } } ], } ], max_tokens = 4000 ) return response.choices[0].message.content def process(api_key, audio_file, webcam_file, chatbot): if(not audio_file or not webcam_file): return chatbot, None if not api_key: raise ValueError("API_KEY not set. Not gonna pay for you") client = OpenAI(api_key=api_key) audio_file = Path(audio_file) transcript = client.audio.translations.create( model="whisper-1", file=audio_file ) webcam_file = Path(webcam_file) with open(webcam_file, "rb") as image_file: image = base64.b64encode(image_file.read()).decode('utf-8') response = call_gpt(client, transcript.text, image) chatbot.append((f"![](/file={webcam_file})", None)) chatbot.append((transcript.text, response)) return chatbot, None WELCOME_TEXT = """ # Corporate Coach for your Conference Calls A demo that uses GPT-4V with 🎤+🎥 to tell you how to be a better corporate employee If you want more tips on how to be a good corporate employee visit www.stefanobaccianella.com """ css = """ .upload-container > div:has(> .uploading) { display: none !important; } """ with gr.Blocks(css=css) as demo: gr.Markdown(WELCOME_TEXT) with gr.Row(): webcam = gr.Image(sources=["webcam"], streaming=True, type="filepath") with gr.Column(): api_key_textbox = gr.Textbox( label="OpenAI API KEY", type="password", value="") chatbot = gr.Chatbot( height=500, bubble_full_width=False) audio = gr.Audio(sources=["microphone"], type="filepath") audio.change(fn=process, inputs=[api_key_textbox, audio, webcam,chatbot], outputs=[chatbot, audio] ) if __name__ == "__main__": demo.launch(show_error=True)