Spaces:

pro-grammer
/

mindful

Sleeping

App Files Files Community

pro-grammer commited on 23 days ago

Commit

1a5c0c8

verified ·

1 Parent(s): fce2a2c

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -38

app.py CHANGED Viewed

@@ -1,52 +1,110 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
-client = InferenceClient("pro-grammer/MindfulAI")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    prompt = system_message.strip() + "\n\n"
-    for user_msg, assistant_msg in history:
-        if user_msg:
-            prompt += f"User: {user_msg.strip()}\n"
-        if assistant_msg:
-            prompt += f"Assistant: {assistant_msg.strip()}\n"
-    prompt += f"User: {message.strip()}\nAssistant:"
-    response = ""
-    for token in client.text_generation(
-        prompt,
         max_new_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
         top_p=top_p,
-    ):
-        token_text = token.choices[0].text
-        response += token_text
-        yield response
 demo = gr.ChatInterface(
-    respond,
     additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
     ],
 )
 if __name__ == "__main__":
-    demo.launch(share=True)

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+# --- Model Initialization ---
+# Paths for tokenizer and your model checkpoint
+tokenizer_path = "facebook/opt-1.3b"
+model_path = "transfer_learning_therapist.pth"
+# Load tokenizer and set pad token if needed
+tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
+if tokenizer.pad_token is None:
+    tokenizer.pad_token = tokenizer.eos_token
+# Set device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Load the base model and then update with your checkpoint
+model = AutoModelForCausalLM.from_pretrained(tokenizer_path)
+checkpoint = torch.load(model_path, map_location=device)
+model_dict = model.state_dict()
+pretrained_dict = {k: v for k, v in checkpoint['model_state_dict'].items() if k in model_dict}
+model_dict.update(pretrained_dict)
+model.load_state_dict(model_dict)
+model.to(device)
+model.eval()
+# --- Inference Function ---
+def generate_response(prompt, max_new_tokens=150, temperature=0.7, top_p=0.9, repetition_penalty=1.2):
+    """Generates a response from your model based on the prompt."""
+    model.eval()
+    model.config.use_cache = True
+    prompt = prompt.strip()
+    if not prompt:
+        return "Please provide a valid input."
+    # Tokenize the input prompt
+    inputs = tokenizer(prompt, return_tensors="pt").to(device)
+    try:
+        with torch.no_grad():
+            outputs = model.generate(
+                inputs.input_ids,
+                attention_mask=inputs.attention_mask,
+                max_new_tokens=max_new_tokens,
+                temperature=temperature,
+                top_p=top_p,
+                do_sample=True,
+                pad_token_id=tokenizer.pad_token_id,
+                eos_token_id=tokenizer.eos_token_id,
+                repetition_penalty=repetition_penalty,
+                num_beams=1,             # greedy decoding
+                no_repeat_ngram_size=3,  # avoid repeated phrases
+            )
+    except Exception as e:
+        return f"Error generating response: {e}"
+    finally:
+        model.config.use_cache = False
+    full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # If your prompt is formatted with role markers (e.g., "Therapist:"), extract only that part:
+    if "Therapist:" in full_response:
+        therapist_response = full_response.split("Therapist:")[-1].strip()
+    else:
+        therapist_response = full_response.strip()
+    return therapist_response
+# --- Gradio Interface Function ---
+def respond(message, history, system_message, max_tokens, temperature, top_p):
+    """
+    Build the conversation context by combining the system message and the dialogue history,
+    then generate a new response from the model.
+    """
+    # Create a conversation prompt with your desired role labels.
+    conversation = f"System: {system_message}\n"
+    for user_msg, assistant_msg in history:
+        conversation += f"Human: {user_msg}\nTherapist: {assistant_msg}\n"
+    conversation += f"Human: {message}\nTherapist:"
+    response = generate_response(
+        conversation,
         max_new_tokens=max_tokens,
         temperature=temperature,
         top_p=top_p,
+    )
+    history.append((message, response))
+    return history, history
+# --- Gradio ChatInterface Setup ---
 demo = gr.ChatInterface(
+    fn=respond,
+    title="MindfulAI Chat",
+    description="Chat with MindfulAI – an AI Therapist powered by your custom model.",
     additional_inputs=[
+        gr.Textbox(value="You are a friendly AI Therapist.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=150, step=1, label="Max new tokens"),
         gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
     ],
 )
 if __name__ == "__main__":
+    demo.launch()