Spaces:

beyoru
/

qew

Running

App Files Files Community

beyoru commited on Jan 16

Commit

b0a1757

verified ·

1 Parent(s): da212c2

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -11

app.py CHANGED Viewed

@@ -2,10 +2,9 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import numpy as np
-import string
 from huggingface_hub import InferenceClient
-# Initialize Inference Client for the model (Ensure you have the correct model ID)
 client = InferenceClient("Qwen/Qwen2.5-3B-Instruct")
 # Load tokenizer and model for EOU detection
@@ -42,6 +41,7 @@ def respond(
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
@@ -53,8 +53,10 @@ def respond(
     messages.append({"role": "user", "content": message})
-    # Get the response from the Qwen model (e.g., for conversation generation)
     response = ""
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
@@ -64,14 +66,28 @@ def respond(
     ):
         token = message.choices[0].delta.content
         response += token
-        yield response
-    # After generating the response, get the EOU probability
-    eou_probability = get_eou_probability(messages)  # Get EOU prediction
-    print(f"EOU Probability: {eou_probability}")
-    # Include the EOU probability in the output
-    yield f"\nEOU Probability: {eou_probability:.2f}"
 # Gradio interface setup
 demo = gr.ChatInterface(
@@ -87,9 +103,15 @@ demo = gr.ChatInterface(
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
     ],
 )
 # Launch Gradio with public link sharing
-demo.launch(share=True)

 from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import numpy as np
 from huggingface_hub import InferenceClient
+# Initialize Inference Client for the model (ensure you have the correct model ID)
 client = InferenceClient("Qwen/Qwen2.5-3B-Instruct")
 # Load tokenizer and model for EOU detection
     max_tokens,
     temperature,
     top_p,
+    eou_threshold: float = 0.9  # Probability threshold to stop or transition the conversation
 ):
     messages = [{"role": "system", "content": system_message}]
     messages.append({"role": "user", "content": message})
     response = ""
+    interruption_detected = False
+    # Streaming model response while checking for EOU
     for message in client.chat_completion(
         messages,
         max_tokens=max_tokens,
     ):
         token = message.choices[0].delta.content
         response += token
+        # Check for EOU probability after each response chunk
+        chat_ctx = [{"role": "user", "content": message} for message in history]
+        chat_ctx.append({"role": "assistant", "content": response})
+        eou_probability = get_eou_probability(chat_ctx)
+        # If EOU probability is above the threshold, consider it an interruption or turn end
+        if eou_probability > eou_threshold:
+            interruption_detected = True
+            break  # Stop the response generation if EOU is high
+        yield response  # Continue yielding the response as it's generated
+    if interruption_detected:
+        # If EOU is high, we stop the assistant response early and handle it
+        yield f"\nAssistant detected an interruption or end of turn. EOU Probability: {eou_probability:.2f}"
+    # Continue if no interruption
+    if not interruption_detected:
+        yield response
 # Gradio interface setup
 demo = gr.ChatInterface(
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
+        gr.Slider(
+            minimum=0.0,
+            maximum=1.0,
+            value=0.9,
+            step=0.01,
+            label="EOU Probability Threshold"
+        ),
     ],
 )
 # Launch Gradio with public link sharing
+demo.launch(share=True)