AnilNiraula commited on
Commit
0aaffc4
·
verified ·
1 Parent(s): 8782909

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -56
app.py CHANGED
@@ -1,4 +1,3 @@
1
- #Loading packages
2
  import logging
3
  import os
4
  import time
@@ -15,11 +14,11 @@ import difflib
15
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
16
  logger = logging.getLogger(__name__)
17
 
18
- # Define device (force CPU for Spaces free tier)
19
  device = torch.device("cpu")
20
  logger.info(f"Using device: {device}")
21
 
22
- # Load dataset and precompute period data
23
  csv_path = "flat-ui__data-Sun Jul 06 2025.csv"
24
  try:
25
  df = pd.read_csv(csv_path)
@@ -32,7 +31,7 @@ except Exception as e:
32
  logger.error(f"Error loading dataset: {e}")
33
  df = None
34
 
35
- # Precompute yearly aggregates for faster lookups
36
  if df is not None:
37
  df_yearly = df.groupby(df['Date'].dt.year).agg({
38
  'SP500': 'mean',
@@ -46,7 +45,7 @@ if df is not None:
46
  else:
47
  df_yearly = None
48
 
49
- # Response cache with financial data entries
50
  response_cache = {
51
  "hi": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
52
  "hello": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
@@ -164,38 +163,7 @@ response_cache = {
164
  ),
165
  "what is the s&p 500 index fund average growth rate?": (
166
  "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data. "
167
- "For example, from 2015 to 2024, it averaged ~12.2% annually. Returns vary yearly due to market conditions. Consult a financial planner."
168
- ),
169
- "what was the s&p 500 return in 2020?": (
170
- "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
171
- ),
172
- "what was the s&p 500 return in 2022?": (
173
- "The S&P 500 returned approximately -18.1% in 2022, impacted by high inflation and interest rate hikes."
174
- ),
175
- "what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
176
- "The S&P 500’s average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
177
- ),
178
- "what is the average annual growth rate of the s&p 500 from 2011 to 2016?": (
179
- "The S&P 500’s average annual growth rate from 2011 to 2016 was approximately 12.7%, including dividends, driven by post-financial crisis recovery."
180
- ),
181
- "what was the average annual return of the s&p 500 between 2010 and 2020?": (
182
- "The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends, driven by post-financial crisis recovery."
183
- ),
184
- "what will my return be in 10 years if i invest $5000 into s&p 500 right now?": (
185
- "Assuming a 10% average annual return, a $5,000 investment in the S&P 500 would grow to approximately $12,974 in 10 years with annual compounding. "
186
- "This is based on the historical average return of 10–12% (1927–2025). Future returns vary and are not guaranteed. Consult a financial planner."
187
- ),
188
- "what was the 1-year average annual growth rate of the s&p 500 from 2020?": (
189
- "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
190
- ),
191
- "what was the 3-year average annual growth rate of the s&p 500 from 2018?": (
192
- "The S&P 500’s average annual growth rate from 2018 to 2020 was approximately 10.2%, including dividends, based on historical data."
193
- ),
194
- "what was the 5-year average annual growth rate of the s&p 500 from 2016?": (
195
- "The S&P 500’s average annual growth rate from 2016 to 2020 was approximately 13.6%, including dividends, driven by strong market recovery."
196
- ),
197
- "what is the average return rate of the s&p 500 in the past 10 years?": (
198
- "The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
199
  )
200
  }
201
 
@@ -209,7 +177,7 @@ try:
209
  except Exception as e:
210
  logger.warning(f"Failed to load cache.json: {e}")
211
 
212
- # Load model and tokenizer (use fine-tuned model if available)
213
  model_name = "./finetuned_model" if os.path.exists("./finetuned_model") else "distilgpt2"
214
  try:
215
  logger.info(f"Loading tokenizer for {model_name}")
@@ -227,7 +195,7 @@ except Exception as e:
227
  logger.error(f"Error loading model/tokenizer: {e}")
228
  raise RuntimeError(f"Failed to load model: {str(e)}")
229
 
230
- # Pre-tokenize prompt prefix
231
  prompt_prefix = (
232
  "You are FinChat, a financial advisor with expertise in stock market performance. Provide detailed, numbered list advice with clear reasoning for investing prompts, "
233
  "including precise historical data when relevant (e.g., S&P 500 returns for specific years or periods). For investment return queries, use compound interest calculations "
@@ -252,7 +220,7 @@ prompt_prefix = (
252
  )
253
  prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
254
 
255
- # Substring matching for cache with fuzzy matching
256
  def get_closest_cache_key(message, cache_keys):
257
  message = message.lower().strip()
258
  matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
@@ -260,25 +228,32 @@ def get_closest_cache_key(message, cache_keys):
260
 
261
  # Parse period from user input
262
  def parse_period(query):
 
263
  # Match specific year ranges (e.g., "between 2010 and 2020", "2000–2008")
264
- match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
265
  if match:
266
  start_year, end_year = map(int, match.groups())
267
- return start_year, end_year, None
268
- # Match duration-based queries (e.g., "1-year from 2020", "3-year growth rate")
269
- match = re.search(r'(\d+)-year.*from\s*(\d{4})', query, re.IGNORECASE)
 
270
  if match:
271
  duration, start_year = map(int, match.groups())
272
  end_year = start_year + duration - 1
273
  return start_year, end_year, duration
274
- # Match general duration queries (e.g., "past 10 years", "3-year growth rate")
275
- match = re.search(r'past\s*(\d+)-year|\b(\d+)-year.*(?:return|growth\s*rate)', query, re.IGNORECASE)
276
  if match:
277
  duration = int(match.group(1) or match.group(2))
278
  max_year = df_yearly['Year'].max() if df_yearly is not None else 2025
279
  start_year = max_year - duration + 1
280
  end_year = max_year
281
  return start_year, end_year, duration
 
 
 
 
 
282
  return None, None, None
283
 
284
  # Calculate average growth rate
@@ -289,7 +264,11 @@ def calculate_growth_rate(start_year, end_year, duration=None):
289
  if df_period.empty:
290
  return None, f"No data available for {start_year} to {end_year}."
291
  avg_return = df_period['Return'].mean()
292
- if duration:
 
 
 
 
293
  response = f"The S&P 500’s {duration}-year average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
294
  else:
295
  response = f"The S&P 500’s average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
@@ -308,7 +287,7 @@ def parse_investment_query(query):
308
  def calculate_future_value(amount, years):
309
  if df_yearly is None or amount is None or years is None:
310
  return None, "Data not available or invalid input."
311
- avg_annual_return = 10.0 # Historical S&P 500 average (1927–2025)
312
  future_value = amount * (1 + avg_annual_return / 100) ** years
313
  return future_value, (
314
  f"Assuming a 10% average annual return, a ${amount:,.0f} investment in the S&P 500 would grow to approximately ${future_value:,.0f} "
@@ -316,7 +295,7 @@ def calculate_future_value(amount, years):
316
  "Future returns vary and are not guaranteed. Consult a financial planner."
317
  )
318
 
319
- # Define chat function
320
  def chat_with_model(user_input, history=None, is_processing=False):
321
  try:
322
  start_time = time.time()
@@ -369,7 +348,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
369
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
370
  return response, history, False, ""
371
 
372
- # Skip model for short prompts
373
  if len(user_input.strip()) <= 5:
374
  logger.info("Short prompt, returning default response")
375
  response = "Hello! I'm FinChat, your financial advisor. Ask about investing!"
@@ -381,7 +360,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
381
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
382
  return response, history, False, ""
383
 
384
- # Construct prompt
385
  full_prompt = prompt_prefix + user_input + "\nA:"
386
  try:
387
  inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
@@ -396,13 +375,12 @@ def chat_with_model(user_input, history=None, is_processing=False):
396
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
397
  return response, history, False, ""
398
 
399
- # Generate response
400
  with torch.inference_mode():
401
  logger.info("Generating response with model")
402
  gen_start_time = time.time()
403
  outputs = model.generate(
404
  **inputs,
405
- max_new_tokens=40, # Reduced for faster inference
406
  min_length=20,
407
  do_sample=False,
408
  repetition_penalty=2.0,
@@ -438,7 +416,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
438
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
439
  return response, history, False, ""
440
 
441
- # Save cache on exit
442
  def save_cache():
443
  try:
444
  with open(cache_file, 'w') as f:
@@ -447,7 +425,7 @@ def save_cache():
447
  except Exception as e:
448
  logger.warning(f"Failed to save cache.json: {e}")
449
 
450
- # Create Gradio interface with loading animation
451
  logger.info("Initializing Gradio interface")
452
  try:
453
  with gr.Blocks(
@@ -503,7 +481,7 @@ except Exception as e:
503
  logger.error(f"Error initializing Gradio interface: {e}")
504
  raise
505
 
506
- # Launch interface (conditional for Spaces)
507
  if __name__ == "__main__" and not os.getenv("HF_SPACE"):
508
  logger.info("Launching Gradio interface locally")
509
  try:
 
 
1
  import logging
2
  import os
3
  import time
 
14
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
15
  logger = logging.getLogger(__name__)
16
 
17
+ # Define device
18
  device = torch.device("cpu")
19
  logger.info(f"Using device: {device}")
20
 
21
+ # Load dataset
22
  csv_path = "flat-ui__data-Sun Jul 06 2025.csv"
23
  try:
24
  df = pd.read_csv(csv_path)
 
31
  logger.error(f"Error loading dataset: {e}")
32
  df = None
33
 
34
+ # Precompute yearly aggregates
35
  if df is not None:
36
  df_yearly = df.groupby(df['Date'].dt.year).agg({
37
  'SP500': 'mean',
 
45
  else:
46
  df_yearly = None
47
 
48
+ # Response cache
49
  response_cache = {
50
  "hi": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
51
  "hello": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
 
163
  ),
164
  "what is the s&p 500 index fund average growth rate?": (
165
  "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data. "
166
+ "Returns vary yearly due to market conditions. Consult a financial planner."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  )
168
  }
169
 
 
177
  except Exception as e:
178
  logger.warning(f"Failed to load cache.json: {e}")
179
 
180
+ # Load model and tokenizer
181
  model_name = "./finetuned_model" if os.path.exists("./finetuned_model") else "distilgpt2"
182
  try:
183
  logger.info(f"Loading tokenizer for {model_name}")
 
195
  logger.error(f"Error loading model/tokenizer: {e}")
196
  raise RuntimeError(f"Failed to load model: {str(e)}")
197
 
198
+ # Prompt prefix
199
  prompt_prefix = (
200
  "You are FinChat, a financial advisor with expertise in stock market performance. Provide detailed, numbered list advice with clear reasoning for investing prompts, "
201
  "including precise historical data when relevant (e.g., S&P 500 returns for specific years or periods). For investment return queries, use compound interest calculations "
 
220
  )
221
  prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
222
 
223
+ # Substring matching for cache
224
  def get_closest_cache_key(message, cache_keys):
225
  message = message.lower().strip()
226
  matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
 
228
 
229
  # Parse period from user input
230
  def parse_period(query):
231
+ query = query.lower()
232
  # Match specific year ranges (e.g., "between 2010 and 2020", "2000–2008")
233
+ match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query)
234
  if match:
235
  start_year, end_year = map(int, match.groups())
236
+ if start_year <= end_year:
237
+ return start_year, end_year, None
238
+ # Match duration-based queries (e.g., "1-year from 2020", "3-year growth rate from 2018")
239
+ match = re.search(r'(\d+)-year.*from\s*(\d{4})', query)
240
  if match:
241
  duration, start_year = map(int, match.groups())
242
  end_year = start_year + duration - 1
243
  return start_year, end_year, duration
244
+ # Match general duration queries (e.g., "past 5 years", "10-year growth rate")
245
+ match = re.search(r'(?:past\s*(\d+)-year|\b(\d+)-year.*(?:return|growth\s*rate))', query)
246
  if match:
247
  duration = int(match.group(1) or match.group(2))
248
  max_year = df_yearly['Year'].max() if df_yearly is not None else 2025
249
  start_year = max_year - duration + 1
250
  end_year = max_year
251
  return start_year, end_year, duration
252
+ # Match single year (e.g., "return in 2020")
253
+ match = re.search(r'return\s*(?:in|for)\s*(\d{4})', query)
254
+ if match:
255
+ year = int(match.group(1))
256
+ return year, year, 1
257
  return None, None, None
258
 
259
  # Calculate average growth rate
 
264
  if df_period.empty:
265
  return None, f"No data available for {start_year} to {end_year}."
266
  avg_return = df_period['Return'].mean()
267
+ if np.isnan(avg_return):
268
+ return None, f"Insufficient data for {start_year} to {end_year}."
269
+ if duration == 1 and start_year == end_year:
270
+ response = f"The S&P 500 returned approximately {avg_return:.1f}% in {start_year}, including dividends."
271
+ elif duration:
272
  response = f"The S&P 500’s {duration}-year average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
273
  else:
274
  response = f"The S&P 500’s average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
 
287
  def calculate_future_value(amount, years):
288
  if df_yearly is None or amount is None or years is None:
289
  return None, "Data not available or invalid input."
290
+ avg_annual_return = 10.0
291
  future_value = amount * (1 + avg_annual_return / 100) ** years
292
  return future_value, (
293
  f"Assuming a 10% average annual return, a ${amount:,.0f} investment in the S&P 500 would grow to approximately ${future_value:,.0f} "
 
295
  "Future returns vary and are not guaranteed. Consult a financial planner."
296
  )
297
 
298
+ # Chat function
299
  def chat_with_model(user_input, history=None, is_processing=False):
300
  try:
301
  start_time = time.time()
 
348
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
349
  return response, history, False, ""
350
 
351
+ # Handle short prompts
352
  if len(user_input.strip()) <= 5:
353
  logger.info("Short prompt, returning default response")
354
  response = "Hello! I'm FinChat, your financial advisor. Ask about investing!"
 
360
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
361
  return response, history, False, ""
362
 
363
+ # Construct and generate response
364
  full_prompt = prompt_prefix + user_input + "\nA:"
365
  try:
366
  inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
 
375
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
376
  return response, history, False, ""
377
 
 
378
  with torch.inference_mode():
379
  logger.info("Generating response with model")
380
  gen_start_time = time.time()
381
  outputs = model.generate(
382
  **inputs,
383
+ max_new_tokens=40,
384
  min_length=20,
385
  do_sample=False,
386
  repetition_penalty=2.0,
 
416
  logger.info(f"Response time: {end_time - start_time:.2f} seconds")
417
  return response, history, False, ""
418
 
419
+ # Save cache
420
  def save_cache():
421
  try:
422
  with open(cache_file, 'w') as f:
 
425
  except Exception as e:
426
  logger.warning(f"Failed to save cache.json: {e}")
427
 
428
+ # Gradio interface
429
  logger.info("Initializing Gradio interface")
430
  try:
431
  with gr.Blocks(
 
481
  logger.error(f"Error initializing Gradio interface: {e}")
482
  raise
483
 
484
+ # Launch interface
485
  if __name__ == "__main__" and not os.getenv("HF_SPACE"):
486
  logger.info("Launching Gradio interface locally")
487
  try: