mangiucugna's picture
fix gradio
a285aa8
from openai import OpenAI
import gradio as gr
from pathlib import Path
import base64
def call_gpt(client, text, image_b64):
response = client.chat.completions.create(
model="gpt-4-vision-preview",
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": "I am in a corporate conference call, give me some useful feedback on how to be more corporate using the following text and image. Be concise and don't provide general advice. If I use profanities point it out to me as something I shouldn't do in a corporate world. Use an extremely corporate, HR-like tone.",
},
{
"type": "text",
"text": text,
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{image_b64}"
}
}
],
}
],
max_tokens = 4000
)
return response.choices[0].message.content
def process(api_key, audio_file, webcam_file, chatbot):
if(not audio_file or not webcam_file):
return chatbot, None
if not api_key:
raise ValueError("API_KEY not set. Not gonna pay for you")
client = OpenAI(api_key=api_key)
audio_file = Path(audio_file)
transcript = client.audio.translations.create(
model="whisper-1",
file=audio_file
)
webcam_file = Path(webcam_file)
with open(webcam_file, "rb") as image_file:
image = base64.b64encode(image_file.read()).decode('utf-8')
response = call_gpt(client, transcript.text, image)
chatbot.append((f"![](/file={webcam_file})", None))
chatbot.append((transcript.text, response))
return chatbot, None
WELCOME_TEXT = """
# Corporate Coach for your Conference Calls
A demo that uses GPT-4V with 🎀+πŸŽ₯ to tell you how to be a better corporate employee
If you want more tips on how to be a good corporate employee visit www.stefanobaccianella.com
"""
css = """
.upload-container > div:has(> .uploading) {
display: none !important;
}
"""
with gr.Blocks(css=css) as demo:
gr.Markdown(WELCOME_TEXT)
with gr.Row():
webcam = gr.Image(sources=["webcam"], streaming=True, type="filepath")
with gr.Column():
api_key_textbox = gr.Textbox(
label="OpenAI API KEY", type="password", value="")
chatbot = gr.Chatbot(
height=500, bubble_full_width=False)
audio = gr.Audio(sources=["microphone"], type="filepath")
audio.change(fn=process,
inputs=[api_key_textbox, audio, webcam,chatbot],
outputs=[chatbot, audio]
)
if __name__ == "__main__":
demo.launch(show_error=True)