Spaces:

AnilNiraula
/

FinChat

Running

App Files Files Community

AnilNiraula commited on Jul 5

Commit

30ff85f

verified ·

1 Parent(s): 475e80c

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -20

app.py CHANGED Viewed

@@ -34,17 +34,20 @@ response_cache = {
     )
 }
-# Load model
 model_name = "distilgpt2"
 try:
     tokenizer = AutoTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=False)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="auto",
-        torch_dtype=torch.float16
     ).to(device)
 except Exception as e:
-    print(f"Error loading OPT-350m: {e}")
     exit()
 # Define chat function
@@ -52,22 +55,26 @@ def chat_with_model(message, history=None):  # Ignore history
     try:
         if not isinstance(message, str):
             return "Error: User input must be a string"
-        if message in response_cache:
-            return response_cache[message]
         full_prompt = (
-            "You are a financial advisor with expertise in stock market investments. "
-            "Provide accurate, detailed, and actionable advice in a single response. "
-            "Do not rely on prior conversation context. "
-            "If you cannot provide specific recommendations (e.g., individual companies), "
-            "explain why and offer general guidance or alternative suggestions.\n"
-            "User: {message}\nAssistant:"
-        ).format(message=message)
-        inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
-        outputs = model.generate(
-        **inputs,
-        max_new_tokens=30,
-        do_sample=False,  # Enables greedy decoding
-        pad_token_id=tokenizer.eos_token_id)
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response[len(full_prompt):].strip() if response.startswith(full_prompt) else response
     except Exception as e:
@@ -76,8 +83,8 @@ def chat_with_model(message, history=None):  # Ignore history
 # Create Gradio interface
 interface = gr.ChatInterface(
     fn=chat_with_model,
-    title="Financial Advisor Chatbot (OPT-350m)",
-    description="Ask for advice on starting to invest in the stock market! Powered by Meta AI's OPT-350m. Provides single, direct answers without conversation history.",
     examples=[
         "Hi, pretend you are a financial advisor. Now tell me how can I start investing in stock market?",
         "You have a list of companies you recommend?"

     )
 }
+# Load model with optimizations
 model_name = "distilgpt2"
 try:
     tokenizer = AutoTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=False)
+    tokenizer.pad_token = tokenizer.eos_token  # Ensure pad token is set
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="auto",
+        torch_dtype=torch.float16,
+        low_cpu_mem_usage=True  # Optimize memory usage
     ).to(device)
+    model.eval()  # Set model to evaluation mode for faster inference
 except Exception as e:
+    print(f"Error loading distilgpt2: {e}")
     exit()
 # Define chat function
     try:
         if not isinstance(message, str):
             return "Error: User input must be a string"
+        # Normalize message for cache lookup (case-insensitive, strip whitespace)
+        message = message.strip().lower()
+        for cached_message, response in response_cache.items():
+            if cached_message.lower() == message:
+                return response
+        # Simplified prompt
         full_prompt = (
+            "Financial advisor: Answer directly about stock market investments. "
+            "No specific company picks without data; suggest ETFs or general advice. "
+            f"User: {message}\nAssistant:"
+        )
+        inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=256).to(device)
+        with torch.no_grad():  # Disable gradient computation for faster inference
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=50,  # Increased slightly for better responses
+                do_sample=False,  # Greedy decoding for speed
+                num_beams=1,  # Disable beam search for faster generation
+                pad_token_id=tokenizer.eos_token_id
+            )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
         return response[len(full_prompt):].strip() if response.startswith(full_prompt) else response
     except Exception as e:
 # Create Gradio interface
 interface = gr.ChatInterface(
     fn=chat_with_model,
+    title="Financial Advisor Chatbot (DistilGPT-2)",
+    description="Ask for advice on starting to invest in the stock market! Powered by DistilGPT-2. Provides single, direct answers without conversation history.",
     examples=[
         "Hi, pretend you are a financial advisor. Now tell me how can I start investing in stock market?",
         "You have a list of companies you recommend?"