Spaces:

luminoussg
/

choupijiang

Sleeping

luminoussg commited on 30 days ago

Commit

edb32fe

verified ·

1 Parent(s): 77ac272

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,20 +26,22 @@ def query_model(prompt, model_endpoint, system_prompt):
         "Content-Type": "application/json",
         "Accept": "application/json"
     }
-    # Combine the system prompt with the user prompt
     formatted_prompt = f"{system_prompt}\nUser: {prompt}\nAssistant:"
     data = {
         "inputs": formatted_prompt,
         "parameters": {
             "max_new_tokens": 512,
-            "temperature": 0.6,  # All models use a temperature of 0.6
         }
     }
     response = requests.post(model_endpoint, headers=headers, json=data)
-    # Uncomment the following line to print the raw API response for debugging:
     # print("Raw response:", response.text)
     try:
@@ -51,7 +53,10 @@ def query_model(prompt, model_endpoint, system_prompt):
         return f"Error: {result['error']}"
     try:
-        return result[0].get("generated_text", "No generated_text found in response")
     except Exception:
         return f"Error: Unexpected response format: {json.dumps(result)}"
@@ -66,7 +71,7 @@ def chat_with_models(user_input, history):
     return history, history
 with gr.Blocks() as demo:
-    gr.Markdown("# Multi-LLM Chatbot using Hugging Face Inference API")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Your Message")
     clear = gr.Button("Clear")

         "Content-Type": "application/json",
         "Accept": "application/json"
     }
+    # Format the prompt to include the system instruction and structure the conversation.
     formatted_prompt = f"{system_prompt}\nUser: {prompt}\nAssistant:"
+    # Include the stop sequence so generation halts when the next user turn starts.
     data = {
         "inputs": formatted_prompt,
         "parameters": {
             "max_new_tokens": 512,
+            "temperature": 0.6,
+            "stop_sequences": ["\nUser:"]
         }
     }
     response = requests.post(model_endpoint, headers=headers, json=data)
+    # Uncomment the next line to print raw API responses for debugging.
     # print("Raw response:", response.text)
     try:
         return f"Error: {result['error']}"
     try:
+        generated_text = result[0].get("generated_text", "No generated_text found in response")
+        # Optionally, strip off the prompt if needed:
+        # generated_text = generated_text[len(formatted_prompt):].strip()
+        return generated_text
     except Exception:
         return f"Error: Unexpected response format: {json.dumps(result)}"
     return history, history
 with gr.Blocks() as demo:
+    gr.Markdown("# Multi-LLM Chatbot using Hugging Face Inference API with Stop Sequences")
     chatbot = gr.Chatbot()
     msg = gr.Textbox(label="Your Message")
     clear = gr.Button("Clear")