youzhang commited on
Commit
54dd47c
·
verified ·
1 Parent(s): 4fd622d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -4
app.py CHANGED
@@ -1,7 +1,74 @@
1
  import gradio as gr
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from openai import OpenAI
3
+ import os
4
+ import time
5
 
6
+ def predict(message, history, system_prompt, model, max_tokens, temperature, top_p):
 
7
 
8
+ # Initialize the OpenAI client
9
+ client = OpenAI(
10
+ api_key=os.environ.get("API_TOKEN"),
11
+ )
12
+
13
+ # Start with the system prompt
14
+ messages = [{"role": "system", "content": system_prompt}]
15
+
16
+ # Add the conversation history
17
+ messages.extend(history if history else [])
18
+
19
+ # Add the current user message
20
+ messages.append({"role": "user", "content": message})
21
+
22
+ # Record the start time
23
+ start_time = time.time()
24
+
25
+ # Streaming response
26
+ response = client.chat.completions.create(
27
+ model=model,
28
+ messages=messages,
29
+ max_tokens=max_tokens,
30
+ temperature=temperature,
31
+ top_p=top_p,
32
+ stop=None,
33
+ stream=True
34
+ )
35
+
36
+ full_message = ""
37
+ first_chunk_time = None
38
+ last_yield_time = None
39
+
40
+ for chunk in response:
41
+ if chunk.choices and chunk.choices[0].delta.content:
42
+ if first_chunk_time is None:
43
+ first_chunk_time = time.time() - start_time # Record time for the first chunk
44
+
45
+ full_message += chunk.choices[0].delta.content
46
+ current_time = time.time()
47
+ chunk_time = current_time - start_time # calculate the time delay of the chunk
48
+ print(f"Message received {chunk_time:.2f} seconds after request: {chunk.choices[0].delta.content}")
49
+
50
+ if last_yield_time is None or (current_time - last_yield_time >= 0.25):
51
+ yield full_message
52
+ last_yield_time = current_time
53
+
54
+ # Ensure to yield any remaining message that didn't meet the time threshold
55
+ if full_message:
56
+ total_time = time.time() - start_time
57
+ # Append timing information to the response message
58
+ full_message += f" (First Chunk: {first_chunk_time:.2f}s, Total: {total_time:.2f}s)"
59
+ yield full_message
60
+
61
+ gr.ChatInterface(
62
+ fn=predict,
63
+ type="messages",
64
+ #save_history=True,
65
+ #editable=True,
66
+ additional_inputs=[
67
+ gr.Textbox("You are a helpful AI assistant.", label="System Prompt"),
68
+ gr.Dropdown(["gpt-4o", "gpt-4o-mini"], label="Model"),
69
+ gr.Slider(800, 4000, value=2000, label="Max Token"),
70
+ gr.Slider(0, 1, value=0.7, label="Temperature"),
71
+ gr.Slider(0, 1, value=0.95, label="Top P"),
72
+ ],
73
+ css="footer{display:none !important}"
74
+ ).launch()