AnilNiraula commited on
Commit
4cf4d11
Β·
verified Β·
1 Parent(s): ff9da77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -8
app.py CHANGED
@@ -1,4 +1,3 @@
1
- #Loading packages
2
  import logging
3
  import os
4
  import time
@@ -170,7 +169,7 @@ response_cache = {
170
  "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
171
  ),
172
  "what was the s&p 500 return in 2022?": (
173
- "The S&P 500 returned approximately -18.1% in 2022, impacted by high inflation and interest rate hikes."
174
  ),
175
  "what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
176
  "The S&P 500’s average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
@@ -196,6 +195,9 @@ response_cache = {
196
  ),
197
  "what is the average return rate of the s&p 500 in the past 10 years?": (
198
  "The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
 
 
 
199
  )
200
  }
201
 
@@ -244,23 +246,32 @@ prompt_prefix = (
244
  "1. This uses the historical average return of 10–12% (1927–2025).\n"
245
  "2. Future returns vary and are not guaranteed.\n\n"
246
  "Example 3:\n"
247
- "Q: What was the average annual return of the S&P 500 between 2010 and 2020?\n"
248
- "A: The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends.\n"
249
- "1. This period includes strong recovery post-financial crisis.\n"
250
  "2. Dividends contribute significantly to total returns.\n\n"
251
  "Q: "
252
  )
253
  prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
254
 
255
- # Substring matching for cache with fuzzy matching
256
  def get_closest_cache_key(message, cache_keys):
257
  message = message.lower().strip()
 
 
 
 
 
 
 
 
 
258
  matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
259
  return matches[0] if matches else None
260
 
261
  # Parse period from user input
262
  def parse_period(query):
263
- # Match specific year ranges (e.g., "between 2010 and 2020", "2000–2008")
264
  match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
265
  if match:
266
  start_year, end_year = map(int, match.groups())
@@ -402,7 +413,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
402
  gen_start_time = time.time()
403
  outputs = model.generate(
404
  **inputs,
405
- max_new_tokens=40, # Reduced for faster inference
406
  min_length=20,
407
  do_sample=False,
408
  repetition_penalty=2.0,
 
 
1
  import logging
2
  import os
3
  import time
 
169
  "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
170
  ),
171
  "what was the s&p 500 return in 2022?": (
172
+ "The S&P 500 returned approximately -19.4% in 2022, impacted by high inflation and interest rate hikes."
173
  ),
174
  "what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
175
  "The S&P 500’s average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
 
195
  ),
196
  "what is the average return rate of the s&p 500 in the past 10 years?": (
197
  "The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
198
+ ),
199
+ "what was the average annual return of the s&p 500 between 2020 and 2022?": (
200
+ "The S&P 500’s average annual return from 2020 to 2022 was approximately 9.0%, including dividends, with significant volatility due to the COVID-19 recovery and 2022 bear market."
201
  )
202
  }
203
 
 
246
  "1. This uses the historical average return of 10–12% (1927–2025).\n"
247
  "2. Future returns vary and are not guaranteed.\n\n"
248
  "Example 3:\n"
249
+ "Q: What was the average annual return of the S&P 500 between 2020 and 2022?\n"
250
+ "A: The S&P 500’s average annual return from 2020 to 2022 was approximately 9.0%, including dividends.\n"
251
+ "1. This period includes the COVID-19 recovery (16.3% in 2020, 26.9% in 2021) and a bear market (-19.4% in 2022).\n"
252
  "2. Dividends contribute significantly to total returns.\n\n"
253
  "Q: "
254
  )
255
  prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
256
 
257
+ # Substring matching for cache with exact year matching
258
  def get_closest_cache_key(message, cache_keys):
259
  message = message.lower().strip()
260
+ # Extract years from the query
261
+ year_match = re.search(r'(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', message)
262
+ if year_match:
263
+ start_year, end_year = year_match.groups()
264
+ # Prioritize exact year matches in cache
265
+ for key in cache_keys:
266
+ if f"{start_year} and {end_year}" in key or f"{start_year} to {end_year}" in key or f"{start_year}–{end_year}" in key:
267
+ return key
268
+ # Fallback to fuzzy matching
269
  matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
270
  return matches[0] if matches else None
271
 
272
  # Parse period from user input
273
  def parse_period(query):
274
+ # Match specific year ranges (e.g., "between 2020 and 2022", "2020–2022")
275
  match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
276
  if match:
277
  start_year, end_year = map(int, match.groups())
 
413
  gen_start_time = time.time()
414
  outputs = model.generate(
415
  **inputs,
416
+ max_new_tokens=40,
417
  min_length=20,
418
  do_sample=False,
419
  repetition_penalty=2.0,