Spaces:

AnilNiraula
/

FinChat

Running

App Files Files Community

AnilNiraula commited on Jul 8

Commit

4cf4d11

verified ·

1 Parent(s): ff9da77

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -8

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-#Loading packages
 import logging
 import os
 import time
@@ -170,7 +169,7 @@ response_cache = {
         "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
     ),
     "what was the s&p 500 return in 2022?": (
-        "The S&P 500 returned approximately -18.1% in 2022, impacted by high inflation and interest rate hikes."
     ),
     "what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
         "The S&P 500’s average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
@@ -196,6 +195,9 @@ response_cache = {
     ),
     "what is the average return rate of the s&p 500 in the past 10 years?": (
         "The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
     )
 }
@@ -244,23 +246,32 @@ prompt_prefix = (
     "1. This uses the historical average return of 10–12% (1927–2025).\n"
     "2. Future returns vary and are not guaranteed.\n\n"
     "Example 3:\n"
-    "Q: What was the average annual return of the S&P 500 between 2010 and 2020?\n"
-    "A: The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends.\n"
-    "1. This period includes strong recovery post-financial crisis.\n"
     "2. Dividends contribute significantly to total returns.\n\n"
     "Q: "
 )
 prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
-# Substring matching for cache with fuzzy matching
 def get_closest_cache_key(message, cache_keys):
     message = message.lower().strip()
     matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
     return matches[0] if matches else None
 # Parse period from user input
 def parse_period(query):
-    # Match specific year ranges (e.g., "between 2010 and 2020", "2000–2008")
     match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
     if match:
         start_year, end_year = map(int, match.groups())
@@ -402,7 +413,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
             gen_start_time = time.time()
             outputs = model.generate(
                 **inputs,
-                max_new_tokens=40,  # Reduced for faster inference
                 min_length=20,
                 do_sample=False,
                 repetition_penalty=2.0,

 import logging
 import os
 import time
         "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
     ),
     "what was the s&p 500 return in 2022?": (
+        "The S&P 500 returned approximately -19.4% in 2022, impacted by high inflation and interest rate hikes."
     ),
     "what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
         "The S&P 500’s average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
     ),
     "what is the average return rate of the s&p 500 in the past 10 years?": (
         "The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
+    ),
+    "what was the average annual return of the s&p 500 between 2020 and 2022?": (
+        "The S&P 500’s average annual return from 2020 to 2022 was approximately 9.0%, including dividends, with significant volatility due to the COVID-19 recovery and 2022 bear market."
     )
 }
     "1. This uses the historical average return of 10–12% (1927–2025).\n"
     "2. Future returns vary and are not guaranteed.\n\n"
     "Example 3:\n"
+    "Q: What was the average annual return of the S&P 500 between 2020 and 2022?\n"
+    "A: The S&P 500’s average annual return from 2020 to 2022 was approximately 9.0%, including dividends.\n"
+    "1. This period includes the COVID-19 recovery (16.3% in 2020, 26.9% in 2021) and a bear market (-19.4% in 2022).\n"
     "2. Dividends contribute significantly to total returns.\n\n"
     "Q: "
 )
 prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
+# Substring matching for cache with exact year matching
 def get_closest_cache_key(message, cache_keys):
     message = message.lower().strip()
+    # Extract years from the query
+    year_match = re.search(r'(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', message)
+    if year_match:
+        start_year, end_year = year_match.groups()
+        # Prioritize exact year matches in cache
+        for key in cache_keys:
+            if f"{start_year} and {end_year}" in key or f"{start_year} to {end_year}" in key or f"{start_year}–{end_year}" in key:
+                return key
+    # Fallback to fuzzy matching
     matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
     return matches[0] if matches else None
 # Parse period from user input
 def parse_period(query):
+    # Match specific year ranges (e.g., "between 2020 and 2022", "2020–2022")
     match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
     if match:
         start_year, end_year = map(int, match.groups())
             gen_start_time = time.time()
             outputs = model.generate(
                 **inputs,
+                max_new_tokens=40,
                 min_length=20,
                 do_sample=False,
                 repetition_penalty=2.0,