Spaces:

AnilNiraula
/

FinChat

Running

App Files Files Community

AnilNiraula commited on Jul 5

Commit

ae72622

verified ·

1 Parent(s): d50fc60

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -16

app.py CHANGED Viewed

@@ -76,36 +76,45 @@ response_cache = {
         "4. Invest regularly using dollar-cost averaging.\n"
         "5. Diversify to manage risk.\n"
         "Consult a financial planner."
     )
 }
 # Load model and tokenizer
-model_name = "distilgpt2"  # Smaller model for CPU
 try:
     logger.info(f"Loading tokenizer for {model_name}")
     tokenizer = AutoTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=False)
     logger.info(f"Loading model {model_name}")
-    model = AutoModelForCausalLM.from_pretrained(
-        model_name,
-        torch_dtype=torch.float16,
-        low_cpu_mem_usage=True,
-        load_in_4bit=True  # 4-bit quantization
-    ).to(device)
 except Exception as e:
     logger.error(f"Error loading model/tokenizer: {e}")
-    raise
 # Pre-tokenize prompt prefix
 prompt_prefix = (
-    "You are a financial advisor. Provide concise, numbered list advice for investing prompts. "
-    "Avoid repetition and vague statements.\n\n"
-    "Example: Q: Give investing tips\nA: 1. Open a brokerage account.\n2. Start with ETFs like VOO.\n3. Use dollar-cost averaging.\n\n"
     "Q: "
 )
 prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
 # Fuzzy matching for cache
-def get_closest_cache_key(message, cache_keys, threshold=0.9):
     matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=threshold)
     return matches[0] if matches else None
@@ -130,11 +139,11 @@ def chat_with_model(message, history=None):
         full_prompt = prompt_prefix + message + "\nA:"
         inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
-        # Generate response with mixed precision
         with torch.cpu.amp.autocast(), torch.no_grad():
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=80,
                 min_length=15,
                 do_sample=True,
                 temperature=0.7,
@@ -161,8 +170,8 @@ interface = gr.ChatInterface(
         "Hi, give me step-by-step investing advice",
         "Give me few investing idea",
         "Give me investing tips",
-        "Do you have a list of companies you recommend?",
-        "What's the difference between stocks and bonds?"
     ]
 )

         "4. Invest regularly using dollar-cost averaging.\n"
         "5. Diversify to manage risk.\n"
         "Consult a financial planner."
+    ),
+    "how to start investing": (
+        "Here’s how to start investing:\n"
+        "1. Study basics on Investopedia.\n"
+        "2. Open a brokerage account (e.g., Fidelity).\n"
+        "3. Deposit $100 or more after securing savings.\n"
+        "4. Buy an ETF like VOO after research.\n"
+        "5. Invest monthly with dollar-cost averaging.\n"
+        "Consult a financial planner."
     )
 }
 # Load model and tokenizer
+model_name = "distilgpt2"
 try:
     logger.info(f"Loading tokenizer for {model_name}")
     tokenizer = AutoTokenizer.from_pretrained(model_name, clean_up_tokenization_spaces=False)
     logger.info(f"Loading model {model_name}")
+    with torch.no_grad():
+        model = AutoModelForCausalLM.from_pretrained(
+            model_name,
+            torch_dtype=torch.float16,
+            low_cpu_mem_usage=True
+        ).to(device)
 except Exception as e:
     logger.error(f"Error loading model/tokenizer: {e}")
+    raise RuntimeError(f"Failed to load model: {str(e)}")
 # Pre-tokenize prompt prefix
 prompt_prefix = (
+    "You are a financial advisor. Provide numbered list advice for investing prompts. "
+    "Avoid repetition.\n\n"
+    "Example: Q: Give investing tips\nA: 1. Open a brokerage.\n2. Buy ETFs like VOO.\n3. Use dollar-cost averaging.\n\n"
     "Q: "
 )
 prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
 # Fuzzy matching for cache
+def get_closest_cache_key(message, cache_keys, threshold=0.85):
     matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=threshold)
     return matches[0] if matches else None
         full_prompt = prompt_prefix + message + "\nA:"
         inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
+        # Generate response
         with torch.cpu.amp.autocast(), torch.no_grad():
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=60,
                 min_length=15,
                 do_sample=True,
                 temperature=0.7,
         "Hi, give me step-by-step investing advice",
         "Give me few investing idea",
         "Give me investing tips",
+        "How to start investing",
+        "Do you have a list of companies you recommend?"
     ]
 )