Spaces:

mangiucugna
/

corporate-videocall-coach

Running

App Files Files Community

corporate-videocall-coach / app.py

mangiucugna

fix gradio

a285aa8 over 1 year ago

raw

history blame contribute delete

2.96 kB

	from openai import OpenAI
	import gradio as gr
	from pathlib import Path
	import base64

	def call_gpt(client, text, image_b64):
	response = client.chat.completions.create(
	model="gpt-4-vision-preview",
	messages=[
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	"text": "I am in a corporate conference call, give me some useful feedback on how to be more corporate using the following text and image. Be concise and don't provide general advice. If I use profanities point it out to me as something I shouldn't do in a corporate world. Use an extremely corporate, HR-like tone.",
	},
	{
	"type": "text",
	"text": text,
	},
	{
	"type": "image_url",
	"image_url": {
	"url": f"data:image/jpeg;base64,{image_b64}"
	}
	}
	],
	}
	],
	max_tokens = 4000
	)
	return response.choices[0].message.content

	def process(api_key, audio_file, webcam_file, chatbot):
	if(not audio_file or not webcam_file):
	return chatbot, None
	if not api_key:
	raise ValueError("API_KEY not set. Not gonna pay for you")
	client = OpenAI(api_key=api_key)
	audio_file = Path(audio_file)
	transcript = client.audio.translations.create(
	model="whisper-1",
	file=audio_file
	)
	webcam_file = Path(webcam_file)
	with open(webcam_file, "rb") as image_file:
	image = base64.b64encode(image_file.read()).decode('utf-8')
	response = call_gpt(client, transcript.text, image)
	chatbot.append((f"![](/file={webcam_file})", None))
	chatbot.append((transcript.text, response))
	return chatbot, None


	WELCOME_TEXT = """
	# Corporate Coach for your Conference Calls

	A demo that uses GPT-4V with 🎤+🎥 to tell you how to be a better corporate employee

	If you want more tips on how to be a good corporate employee visit www.stefanobaccianella.com
	"""

	css = """
	.upload-container > div:has(> .uploading) {
	display: none !important;
	}
	"""

	with gr.Blocks(css=css) as demo:
	gr.Markdown(WELCOME_TEXT)
	with gr.Row():
	webcam = gr.Image(sources=["webcam"], streaming=True, type="filepath")
	with gr.Column():
	api_key_textbox = gr.Textbox(
	label="OpenAI API KEY", type="password", value="")
	chatbot = gr.Chatbot(
	height=500, bubble_full_width=False)
	audio = gr.Audio(sources=["microphone"], type="filepath")
	audio.change(fn=process,
	inputs=[api_key_textbox, audio, webcam,chatbot],
	outputs=[chatbot, audio]
	)

	if __name__ == "__main__":
	demo.launch(show_error=True)