''' import gradio as gr from openai import OpenAI import os import time def predict(message, history, system_prompt, model, max_tokens, temperature, top_p): # Initialize the OpenAI client client = OpenAI( api_key=os.environ.get("API_TOKEN"), ) # Start with the system prompt messages = [{"role": "system", "content": system_prompt}] # Add the conversation history messages.extend(history if history else []) # Add the current user message messages.append({"role": "user", "content": message}) # Record the start time start_time = time.time() # Streaming response response = client.chat.completions.create( model=model, messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stop=None, stream=True ) full_message = "" first_chunk_time = None last_yield_time = None for chunk in response: if chunk.choices and chunk.choices[0].delta.content: if first_chunk_time is None: first_chunk_time = time.time() - start_time # Record time for the first chunk full_message += chunk.choices[0].delta.content current_time = time.time() chunk_time = current_time - start_time # calculate the time delay of the chunk print(f"Message received {chunk_time:.2f} seconds after request: {chunk.choices[0].delta.content}") if last_yield_time is None or (current_time - last_yield_time >= 0.25): yield full_message last_yield_time = current_time # Ensure to yield any remaining message that didn't meet the time threshold if full_message: total_time = time.time() - start_time # Append timing information to the response message full_message += f" (First Chunk: {first_chunk_time:.2f}s, Total: {total_time:.2f}s)" yield full_message gr.ChatInterface( fn=predict, type="messages", #save_history=True, #editable=True, additional_inputs=[ gr.Textbox("You are a helpful AI assistant.", label="System Prompt"), gr.Dropdown(["gpt-4o", "gpt-4o-mini"], label="Model"), gr.Slider(800, 4000, value=2000, label="Max Token"), gr.Slider(0, 1, value=0.7, label="Temperature"), gr.Slider(0, 1, value=0.95, label="Top P"), ], css="footer{display:none !important}" ).launch() ''' import gradio as gr from openai import OpenAI import os import time from PIL import Image import requests from io import BytesIO # Prompts for each subject prompts = { "History": "You are a professional history tutor. You explain historical events clearly and vividly. In addition to answering questions, you analyze potential difficulties the user may face and generate a thoughtful follow-up question.", "Writing": "You are a writing coach who helps users improve their writing skills. You offer revision suggestions, identify issues in expression, and generate a thought-provoking follow-up question.", "Computer Science": "You are a computer science tutor skilled at explaining algorithms, programming concepts, and system architecture. You assess user understanding and suggest a further exploratory question.", "Science": "You are a science tutor helping students understand concepts in physics, chemistry, or biology. You explain scientific principles and guide the student to think more deeply with a follow-up question." } # Shared prediction generator def predict(user_input, history, subject, model, max_tokens, temperature, top_p): system_prompt = prompts[subject] client = OpenAI(api_key=os.environ.get("API_TOKEN")) # Convert chat history into OpenAI format messages = [{"role": "system", "content": system_prompt}] for user_msg, assistant_msg in history: messages.append({"role": "user", "content": user_msg}) messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": user_input}) start_time = time.time() response = client.chat.completions.create( model=model, messages=messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p, stream=True ) full_message = "" first_chunk_time = None last_yield_time = None for chunk in response: content = chunk.choices[0].delta.content if chunk.choices and chunk.choices[0].delta.content else None if content: if first_chunk_time is None: first_chunk_time = time.time() - start_time full_message += content current_time = time.time() if last_yield_time is None or (current_time - last_yield_time >= 0.25): yield full_message last_yield_time = current_time if full_message: total_time = time.time() - start_time full_message += f" (First Chunk: {first_chunk_time:.2f}s, Total: {total_time:.2f}s)" yield full_message # Function to generate image based on prompt def generate_image(prompt, size="256x256"): response = openai.Image.create( prompt=prompt, n=1, size=size ) image_url = response['data'][0]['url'] image_response = requests.get(image_url) image = Image.open(BytesIO(image_response.content)) return image # Gradio interface with gr.Blocks(css="footer{display:none !important}") as demo: gr.Markdown("# 📚 Educational Learning Assistant") with gr.Tabs(): for subject in prompts.keys(): with gr.Tab(subject): chatbot = gr.Chatbot(label=f"{subject} Chat") user_input = gr.Textbox(label="Your Question") with gr.Row(): model = gr.Dropdown(["gpt-4o", "gpt-4o-mini"], value="gpt-4o", label="Model") max_tokens = gr.Slider(800, 4000, value=2000, label="Max Tokens") temperature = gr.Slider(0, 1, value=0.7, label="Temperature") top_p = gr.Slider(0, 1, value=0.95, label="Top P") state = gr.State([]) # 👇 Use subject=subject to freeze its value def wrapped_predict(message, history, model, max_tokens, temperature, top_p, subject=subject): full_response = "" for chunk in predict(message, history, subject, model, max_tokens, temperature, top_p): full_response = chunk history.append([message, full_response]) # Generate image based on the latest assistant response image = generate_image(text_response) return history, "",image user_input.submit( wrapped_predict, inputs=[user_input, state, model, max_tokens, temperature, top_p], outputs=[chatbot, user_input] ) demo.launch()