Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,3 @@
|
|
1 |
-
#Loading packages
|
2 |
import logging
|
3 |
import os
|
4 |
import time
|
@@ -170,7 +169,7 @@ response_cache = {
|
|
170 |
"The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
|
171 |
),
|
172 |
"what was the s&p 500 return in 2022?": (
|
173 |
-
"The S&P 500 returned approximately -
|
174 |
),
|
175 |
"what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
|
176 |
"The S&P 500βs average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
|
@@ -196,6 +195,9 @@ response_cache = {
|
|
196 |
),
|
197 |
"what is the average return rate of the s&p 500 in the past 10 years?": (
|
198 |
"The S&P 500βs average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
|
|
|
|
|
|
|
199 |
)
|
200 |
}
|
201 |
|
@@ -244,23 +246,32 @@ prompt_prefix = (
|
|
244 |
"1. This uses the historical average return of 10β12% (1927β2025).\n"
|
245 |
"2. Future returns vary and are not guaranteed.\n\n"
|
246 |
"Example 3:\n"
|
247 |
-
"Q: What was the average annual return of the S&P 500 between
|
248 |
-
"A: The S&P 500βs average annual return from
|
249 |
-
"1. This period includes
|
250 |
"2. Dividends contribute significantly to total returns.\n\n"
|
251 |
"Q: "
|
252 |
)
|
253 |
prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
|
254 |
|
255 |
-
# Substring matching for cache with
|
256 |
def get_closest_cache_key(message, cache_keys):
|
257 |
message = message.lower().strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
258 |
matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
|
259 |
return matches[0] if matches else None
|
260 |
|
261 |
# Parse period from user input
|
262 |
def parse_period(query):
|
263 |
-
# Match specific year ranges (e.g., "between
|
264 |
match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|β)\s*(\d{4})', query, re.IGNORECASE)
|
265 |
if match:
|
266 |
start_year, end_year = map(int, match.groups())
|
@@ -402,7 +413,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
|
|
402 |
gen_start_time = time.time()
|
403 |
outputs = model.generate(
|
404 |
**inputs,
|
405 |
-
max_new_tokens=40,
|
406 |
min_length=20,
|
407 |
do_sample=False,
|
408 |
repetition_penalty=2.0,
|
|
|
|
|
1 |
import logging
|
2 |
import os
|
3 |
import time
|
|
|
169 |
"The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
|
170 |
),
|
171 |
"what was the s&p 500 return in 2022?": (
|
172 |
+
"The S&P 500 returned approximately -19.4% in 2022, impacted by high inflation and interest rate hikes."
|
173 |
),
|
174 |
"what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
|
175 |
"The S&P 500βs average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
|
|
|
195 |
),
|
196 |
"what is the average return rate of the s&p 500 in the past 10 years?": (
|
197 |
"The S&P 500βs average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
|
198 |
+
),
|
199 |
+
"what was the average annual return of the s&p 500 between 2020 and 2022?": (
|
200 |
+
"The S&P 500βs average annual return from 2020 to 2022 was approximately 9.0%, including dividends, with significant volatility due to the COVID-19 recovery and 2022 bear market."
|
201 |
)
|
202 |
}
|
203 |
|
|
|
246 |
"1. This uses the historical average return of 10β12% (1927β2025).\n"
|
247 |
"2. Future returns vary and are not guaranteed.\n\n"
|
248 |
"Example 3:\n"
|
249 |
+
"Q: What was the average annual return of the S&P 500 between 2020 and 2022?\n"
|
250 |
+
"A: The S&P 500βs average annual return from 2020 to 2022 was approximately 9.0%, including dividends.\n"
|
251 |
+
"1. This period includes the COVID-19 recovery (16.3% in 2020, 26.9% in 2021) and a bear market (-19.4% in 2022).\n"
|
252 |
"2. Dividends contribute significantly to total returns.\n\n"
|
253 |
"Q: "
|
254 |
)
|
255 |
prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
|
256 |
|
257 |
+
# Substring matching for cache with exact year matching
|
258 |
def get_closest_cache_key(message, cache_keys):
|
259 |
message = message.lower().strip()
|
260 |
+
# Extract years from the query
|
261 |
+
year_match = re.search(r'(\d{4})\s*(?:and|to|-|β)\s*(\d{4})', message)
|
262 |
+
if year_match:
|
263 |
+
start_year, end_year = year_match.groups()
|
264 |
+
# Prioritize exact year matches in cache
|
265 |
+
for key in cache_keys:
|
266 |
+
if f"{start_year} and {end_year}" in key or f"{start_year} to {end_year}" in key or f"{start_year}β{end_year}" in key:
|
267 |
+
return key
|
268 |
+
# Fallback to fuzzy matching
|
269 |
matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
|
270 |
return matches[0] if matches else None
|
271 |
|
272 |
# Parse period from user input
|
273 |
def parse_period(query):
|
274 |
+
# Match specific year ranges (e.g., "between 2020 and 2022", "2020β2022")
|
275 |
match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|β)\s*(\d{4})', query, re.IGNORECASE)
|
276 |
if match:
|
277 |
start_year, end_year = map(int, match.groups())
|
|
|
413 |
gen_start_time = time.time()
|
414 |
outputs = model.generate(
|
415 |
**inputs,
|
416 |
+
max_new_tokens=40,
|
417 |
min_length=20,
|
418 |
do_sample=False,
|
419 |
repetition_penalty=2.0,
|