AnilNiraula commited on
Commit
e9cecd4
Β·
verified Β·
1 Parent(s): cb4d0c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +200 -47
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import logging
2
  import os
3
  import time
@@ -9,7 +10,6 @@ import re
9
  import numpy as np
10
  import json
11
  import difflib
12
- from functools import lru_cache
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
@@ -46,14 +46,170 @@ if df is not None:
46
  else:
47
  df_yearly = None
48
 
49
- # Hardcoded fallback for recent periods if dataset is incomplete
50
- fallback_returns = {
51
- (2020, 2022): 8.3, # Average annual return based on external data
52
- (2015, 2024): 12.2,
53
- (2020, 2024): 10.5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  }
55
 
56
- # Load model and tokenizer at startup
 
 
 
 
 
 
 
 
 
 
57
  model_name = "./finetuned_model" if os.path.exists("./finetuned_model") else "distilgpt2"
58
  try:
59
  logger.info(f"Loading tokenizer for {model_name}")
@@ -61,14 +217,11 @@ try:
61
  tokenizer.pad_token = tokenizer.eos_token
62
  logger.info(f"Loading model {model_name}")
63
  with torch.inference_mode():
64
- if os.path.exists("./finetuned_model/distilgpt2_traced.pt"):
65
- model = torch.jit.load("./finetuned_model/distilgpt2_traced.pt")
66
- else:
67
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=True).to(device)
68
- sample_input = tokenizer("What was the average annual return of the S&P 500 between 2020 and 2022?", return_tensors="pt", padding=True, truncation=True)
69
- sample_input = {k: v.to(device) for k, v in sample_input.items()}
70
- model = torch.jit.trace(model, (sample_input["input_ids"], sample_input["attention_mask"]))
71
- model.save("./finetuned_model/distilgpt2_traced.pt")
72
  logger.info(f"Successfully loaded model: {model_name}")
73
  except Exception as e:
74
  logger.error(f"Error loading model/tokenizer: {e}")
@@ -76,38 +229,38 @@ except Exception as e:
76
 
77
  # Pre-tokenize prompt prefix
78
  prompt_prefix = (
79
- "You are FinChat, a financial advisor with expertise in stock market performance. Provide concise, accurate answers with historical data for S&P 500 queries. "
80
- "For period-specific queries, use precise year ranges and calculate average annual returns. For investment return queries, use compound interest calculations "
81
- "based on historical averages. Avoid repetition and ensure answers are relevant.\n\n"
82
  "Example 1:\n"
83
  "Q: What is the S&P 500’s average annual return?\n"
84
- "A: The S&P 500’s average annual return is ~10–12% over the long term (1927–2025), including dividends.\n\n"
 
 
 
85
  "Example 2:\n"
86
  "Q: What will $5,000 be worth in 10 years if invested in the S&P 500?\n"
87
- "A: Assuming a 10% average annual return, a $5,000 investment in the S&P 500 would grow to approximately $12,974 in 10 years with annual compounding.\n\n"
 
 
88
  "Example 3:\n"
89
- "Q: What was the average annual return of the S&P 500 between 2020 and 2022?\n"
90
- "A: The S&P 500’s average annual return from 2020 to 2022 was approximately 8.3%, including dividends, with significant volatility due to the COVID-19 recovery and 2022 bear market.\n\n"
 
 
91
  "Q: "
92
  )
93
- prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512)["input_ids"].to(device)
94
 
95
- # Substring matching for cache with exact year matching
96
- @lru_cache(maxsize=100)
97
- def get_closest_cache_key(message):
98
  message = message.lower().strip()
99
- year_match = re.search(r'(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', message)
100
- if year_match:
101
- start_year, end_year = year_match.groups()
102
- for key in response_cache.keys():
103
- if f"{start_year} and {end_year}" in key or f"{start_year} to {end_year}" in key or f"{start_year}–{end_year}" in key:
104
- return key
105
- matches = difflib.get_close_matches(message, response_cache.keys(), n=1, cutoff=0.7)
106
  return matches[0] if matches else None
107
 
108
  # Parse period from user input
109
  def parse_period(query):
110
- # Match specific year ranges (e.g., "between 2020 and 2022", "2020–2022")
111
  match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
112
  if match:
113
  start_year, end_year = map(int, match.groups())
@@ -118,7 +271,7 @@ def parse_period(query):
118
  duration, start_year = map(int, match.groups())
119
  end_year = start_year + duration - 1
120
  return start_year, end_year, duration
121
- # Match general duration queries (e.g., "past 5 years", "3-year growth rate")
122
  match = re.search(r'past\s*(\d+)-year|\b(\d+)-year.*(?:return|growth\s*rate)', query, re.IGNORECASE)
123
  if match:
124
  duration = int(match.group(1) or match.group(2))
@@ -130,13 +283,6 @@ def parse_period(query):
130
 
131
  # Calculate average growth rate
132
  def calculate_growth_rate(start_year, end_year, duration=None):
133
- if (start_year, end_year) in fallback_returns:
134
- avg_return = fallback_returns[(start_year, end_year)]
135
- if duration:
136
- response = f"The S&P 500’s {duration}-year average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
137
- else:
138
- response = f"The S&P 500’s average annual return from {start_year} to {end_year} was approximately {avg_return:.1f}%, including dividends."
139
- return avg_return, response
140
  if df_yearly is None or start_year is None or end_year is None:
141
  return None, "Data not available or invalid period."
142
  df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
@@ -180,7 +326,8 @@ def chat_with_model(user_input, history=None, is_processing=False):
180
 
181
  # Normalize and check cache
182
  cache_key = user_input.lower().strip()
183
- closest_key = get_closest_cache_key(cache_key)
 
184
  if closest_key:
185
  logger.info(f"Cache hit for: {closest_key}")
186
  response = response_cache[closest_key]
@@ -237,8 +384,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
237
  # Construct prompt
238
  full_prompt = prompt_prefix + user_input + "\nA:"
239
  try:
240
- inputs = tokenizer(full_prompt, return_tensors="pt", padding=True, truncation=True)
241
- inputs = {k: v.to(device) for k, v in inputs.items()}
242
  except Exception as e:
243
  logger.error(f"Error tokenizing input: {e}")
244
  response = f"Error: Failed to process input: {str(e)}"
@@ -254,7 +400,14 @@ def chat_with_model(user_input, history=None, is_processing=False):
254
  with torch.inference_mode():
255
  logger.info("Generating response with model")
256
  gen_start_time = time.time()
257
- outputs = model(inputs["input_ids"], inputs["attention_mask"])
 
 
 
 
 
 
 
258
  gen_end_time = time.time()
259
  logger.info(f"Generation time: {gen_end_time - gen_start_time:.2f} seconds")
260
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
@@ -288,7 +441,7 @@ def chat_with_model(user_input, history=None, is_processing=False):
288
  # Save cache on exit
289
  def save_cache():
290
  try:
291
- with open("cache.json", "w") as f:
292
  json.dump(response_cache, f, indent=2)
293
  logger.info("Saved cache to cache.json")
294
  except Exception as e:
 
1
+ #Loading packages
2
  import logging
3
  import os
4
  import time
 
10
  import numpy as np
11
  import json
12
  import difflib
 
13
 
14
  # Set up logging
15
  logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
 
46
  else:
47
  df_yearly = None
48
 
49
+ # Response cache with financial data entries
50
+ response_cache = {
51
+ "hi": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
52
+ "hello": "Hello! I'm FinChat, your financial advisor. How can I help with investing?",
53
+ "hey": "Hi there! Ready to discuss investment goals with FinChat?",
54
+ "what is better individual stocks or etfs?": (
55
+ "Here’s a comparison of individual stocks vs. ETFs:\n"
56
+ "1. **Individual Stocks**: High returns possible (e.g., Apple up 80% in 2020) but riskier due to lack of diversification. Require active research.\n"
57
+ "2. **ETFs**: Diversify risk by tracking indices (e.g., SPY, S&P 500, ~12% avg. return 2015–2024). Lower fees and less research needed.\n"
58
+ "3. **Recommendation**: Beginners should start with ETFs; experienced investors may add stocks.\n"
59
+ "Consult a financial planner."
60
+ ),
61
+ "is $100 per month enough to invest?": (
62
+ "Yes, $100 per month is enough to start investing. Here’s why and how:\n"
63
+ "1. **Feasibility**: Brokerages like Fidelity have no minimums, and commission-free trading eliminates fees.\n"
64
+ "2. **Options**: Buy fractional shares of ETFs (e.g., SPY, ~$622/share in 2025) with $100.\n"
65
+ "3. **Strategy**: Use dollar-cost averaging to invest monthly, reducing market timing risks.\n"
66
+ "4. **Growth**: At 10% annual return, $100 monthly could grow to ~$41,000 in 20 years.\n"
67
+ "5. **Tips**: Ensure an emergency fund; diversify.\n"
68
+ "Consult a financial planner."
69
+ ),
70
+ "can i invest $100 a month?": (
71
+ "Yes, $100 a month is sufficient. Here’s how:\n"
72
+ "1. **Brokerage**: Open an account with Fidelity or Vanguard (no minimums).\n"
73
+ "2. **Investments**: Buy fractional shares of ETFs like SPY ($100 buys ~0.16 shares in 2025).\n"
74
+ "3. **Approach**: Use dollar-cost averaging for steady growth.\n"
75
+ "4. **Long-Term**: At 10% return, $100 monthly could reach ~$41,000 in 20 years.\n"
76
+ "5. **Tips**: Prioritize an emergency fund and diversify.\n"
77
+ "Consult a financial planner."
78
+ ),
79
+ "hi, give me step-by-step investing advice": (
80
+ "Here’s a step-by-step guide to start investing:\n"
81
+ "1. Open a brokerage account (e.g., Fidelity, Vanguard) if 18 or older.\n"
82
+ "2. Deposit an affordable amount, like $100, after an emergency fund.\n"
83
+ "3. Research and buy an ETF (e.g., SPY) using Yahoo Finance.\n"
84
+ "4. Monitor monthly and enable dividend reinvesting.\n"
85
+ "5. Use dollar-cost averaging ($100 monthly) to reduce risk.\n"
86
+ "6. Diversify across sectors.\n"
87
+ "Consult a financial planner."
88
+ ),
89
+ "hi, pretend you are a financial advisor. now tell me how can i start investing in stock market?": (
90
+ "Here’s a guide to start investing:\n"
91
+ "1. Learn from Investopedia or 'The Intelligent Investor.'\n"
92
+ "2. Set goals (e.g., retirement) and assess risk.\n"
93
+ "3. Choose a brokerage (Fidelity, Vanguard).\n"
94
+ "4. Start with ETFs (e.g., SPY) or mutual funds.\n"
95
+ "5. Use dollar-cost averaging ($100-$500 monthly).\n"
96
+ "6. Diversify and monitor.\n"
97
+ "Consult a financial planner."
98
+ ),
99
+ "do you have a list of companies you recommend?": (
100
+ "I can’t recommend specific companies without data. Try ETFs like SPY (S&P 500, ~12% avg. return 2015–2024) or QQQ (tech). "
101
+ "Research stocks like Apple (AAPL, ~80% return in 2020) or Johnson & Johnson on Yahoo Finance.\n"
102
+ "Consult a financial planner."
103
+ ),
104
+ "how do i start investing in stocks?": (
105
+ "Learn from Investopedia. Set goals and assess risk. Open a brokerage account (Fidelity, Vanguard) "
106
+ "and start with ETFs (e.g., SPY, ~12% avg. return 2015–2024). Consult a financial planner."
107
+ ),
108
+ "what's the difference between stocks and bonds?": (
109
+ "Stocks are company ownership with high risk and growth potential (e.g., S&P 500 ~12% avg. return 2015–2024). Bonds are loans to companies/governments "
110
+ "with lower risk and steady interest. Diversify for balance."
111
+ ),
112
+ "how much should i invest?": (
113
+ "Invest what you can afford after expenses and an emergency fund. Start with $100-$500 monthly "
114
+ "in ETFs like SPY (~12% avg. return 2015–2024). Consult a financial planner."
115
+ ),
116
+ "what is dollar-cost averaging?": (
117
+ "Dollar-cost averaging is investing a fixed amount regularly (e.g., $100 monthly) in ETFs, "
118
+ "reducing risk by spreading purchases over time."
119
+ ),
120
+ "give me few investing idea": (
121
+ "Here are investing ideas:\n"
122
+ "1. Open a brokerage account (e.g., Fidelity) if 18 or older.\n"
123
+ "2. Deposit $100 or what you can afford.\n"
124
+ "3. Buy a researched ETF (e.g., SPY, ~12% avg. return 2015–2024) or index fund.\n"
125
+ "4. Check regularly and enable dividend reinvesting.\n"
126
+ "5. Use dollar-cost averaging (e.g., monthly buys).\n"
127
+ "Consult a financial planner."
128
+ ),
129
+ "give me investing tips": (
130
+ "Here are investing tips:\n"
131
+ "1. Educate yourself with Investopedia or books.\n"
132
+ "2. Open a brokerage account (e.g., Vanguard).\n"
133
+ "3. Start small with ETFs like SPY (~12% avg. return 2015–2024).\n"
134
+ "4. Invest regularly using dollar-cost averaging.\n"
135
+ "5. Diversify to manage risk.\n"
136
+ "Consult a financial planner."
137
+ ),
138
+ "how to start investing": (
139
+ "Here’s how to start investing:\n"
140
+ "1. Study basics on Investopedia.\n"
141
+ "2. Open a brokerage account (e.g., Fidelity).\n"
142
+ "3. Deposit $100 or more after securing savings.\n"
143
+ "4. Buy an ETF like SPY (~12% avg. return 2015–2024) after research.\n"
144
+ "5. Invest monthly with dollar-cost averaging.\n"
145
+ "Consult a financial planner."
146
+ ),
147
+ "investing advice": (
148
+ "Here’s investing advice:\n"
149
+ "1. Learn basics from Investopedia.\n"
150
+ "2. Open a brokerage account (e.g., Vanguard).\n"
151
+ "3. Start with $100 in an ETF like SPY (~12% avg. return 2015–2024).\n"
152
+ "4. Use dollar-cost averaging for regular investments.\n"
153
+ "5. Monitor and diversify your portfolio.\n"
154
+ "Consult a financial planner."
155
+ ),
156
+ "steps to invest": (
157
+ "Here are steps to invest:\n"
158
+ "1. Educate yourself using Investopedia.\n"
159
+ "2. Open a brokerage account (e.g., Fidelity).\n"
160
+ "3. Deposit an initial $100 after savings.\n"
161
+ "4. Buy an ETF like SPY (~12% avg. return 2015–2024) after research.\n"
162
+ "5. Use dollar-cost averaging monthly.\n"
163
+ "Consult a financial planner."
164
+ ),
165
+ "what is the s&p 500 index fund average growth rate?": (
166
+ "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data. "
167
+ "For example, from 2015 to 2024, it averaged ~12.2% annually. Returns vary yearly due to market conditions. Consult a financial planner."
168
+ ),
169
+ "what was the s&p 500 return in 2020?": (
170
+ "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
171
+ ),
172
+ "what was the s&p 500 return in 2022?": (
173
+ "The S&P 500 returned approximately -18.1% in 2022, impacted by high inflation and interest rate hikes."
174
+ ),
175
+ "what is the average annual growth rate of the s&p 500 from 2000 to 2010?": (
176
+ "The S&P 500’s average annual growth rate from 2000 to 2010 was approximately 0.4%, including dividends, impacted by the dot-com crash and 2008 financial crisis."
177
+ ),
178
+ "what is the average annual growth rate of the s&p 500 from 2011 to 2016?": (
179
+ "The S&P 500’s average annual growth rate from 2011 to 2016 was approximately 12.7%, including dividends, driven by post-financial crisis recovery."
180
+ ),
181
+ "what was the average annual return of the s&p 500 between 2010 and 2020?": (
182
+ "The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends, driven by post-financial crisis recovery."
183
+ ),
184
+ "what will my return be in 10 years if i invest $5000 into s&p 500 right now?": (
185
+ "Assuming a 10% average annual return, a $5,000 investment in the S&P 500 would grow to approximately $12,974 in 10 years with annual compounding. "
186
+ "This is based on the historical average return of 10–12% (1927–2025). Future returns vary and are not guaranteed. Consult a financial planner."
187
+ ),
188
+ "what was the 1-year average annual growth rate of the s&p 500 from 2020?": (
189
+ "The S&P 500 returned approximately 16.3% in 2020, including dividends, driven by recovery from the COVID-19 market crash."
190
+ ),
191
+ "what was the 3-year average annual growth rate of the s&p 500 from 2018?": (
192
+ "The S&P 500’s average annual growth rate from 2018 to 2020 was approximately 10.2%, including dividends, based on historical data."
193
+ ),
194
+ "what was the 5-year average annual growth rate of the s&p 500 from 2016?": (
195
+ "The S&P 500’s average annual growth rate from 2016 to 2020 was approximately 13.6%, including dividends, driven by strong market recovery."
196
+ ),
197
+ "what is the average return rate of the s&p 500 in the past 10 years?": (
198
+ "The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
199
+ )
200
  }
201
 
202
+ # Load persistent cache
203
+ cache_file = "cache.json"
204
+ try:
205
+ if os.path.exists(cache_file):
206
+ with open(cache_file, 'r') as f:
207
+ response_cache.update(json.load(f))
208
+ logger.info("Loaded persistent cache from cache.json")
209
+ except Exception as e:
210
+ logger.warning(f"Failed to load cache.json: {e}")
211
+
212
+ # Load model and tokenizer (use fine-tuned model if available)
213
  model_name = "./finetuned_model" if os.path.exists("./finetuned_model") else "distilgpt2"
214
  try:
215
  logger.info(f"Loading tokenizer for {model_name}")
 
217
  tokenizer.pad_token = tokenizer.eos_token
218
  logger.info(f"Loading model {model_name}")
219
  with torch.inference_mode():
220
+ model = AutoModelForCausalLM.from_pretrained(
221
+ model_name,
222
+ torch_dtype=torch.float16,
223
+ low_cpu_mem_usage=True
224
+ ).to(device)
 
 
 
225
  logger.info(f"Successfully loaded model: {model_name}")
226
  except Exception as e:
227
  logger.error(f"Error loading model/tokenizer: {e}")
 
229
 
230
  # Pre-tokenize prompt prefix
231
  prompt_prefix = (
232
+ "You are FinChat, a financial advisor with expertise in stock market performance. Provide detailed, numbered list advice with clear reasoning for investing prompts, "
233
+ "including precise historical data when relevant (e.g., S&P 500 returns for specific years or periods). For investment return queries, use compound interest calculations "
234
+ "based on historical averages. Avoid repetition and incomplete answers. Explain why each step or choice is beneficial.\n\n"
235
  "Example 1:\n"
236
  "Q: What is the S&P 500’s average annual return?\n"
237
+ "A: The S&P 500’s average annual return is ~10–12% over the long term (1927–2025), including dividends.\n"
238
+ "1. This reflects historical data adjusted for inflation and dividends.\n"
239
+ "2. Returns vary yearly (e.g., 16.3% in 2020) due to market conditions.\n"
240
+ "3. ETFs like SPY track this index for broad market exposure.\n\n"
241
  "Example 2:\n"
242
  "Q: What will $5,000 be worth in 10 years if invested in the S&P 500?\n"
243
+ "A: Assuming a 10% average annual return, a $5,000 investment in the S&P 500 would grow to approximately $12,974 in 10 years with annual compounding.\n"
244
+ "1. This uses the historical average return of 10–12% (1927–2025).\n"
245
+ "2. Future returns vary and are not guaranteed.\n\n"
246
  "Example 3:\n"
247
+ "Q: What was the average annual return of the S&P 500 between 2010 and 2020?\n"
248
+ "A: The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends.\n"
249
+ "1. This period includes strong recovery post-financial crisis.\n"
250
+ "2. Dividends contribute significantly to total returns.\n\n"
251
  "Q: "
252
  )
253
+ prefix_tokens = tokenizer(prompt_prefix, return_tensors="pt", truncation=True, max_length=512).to(device)
254
 
255
+ # Substring matching for cache with fuzzy matching
256
+ def get_closest_cache_key(message, cache_keys):
 
257
  message = message.lower().strip()
258
+ matches = difflib.get_close_matches(message, cache_keys, n=1, cutoff=0.8)
 
 
 
 
 
 
259
  return matches[0] if matches else None
260
 
261
  # Parse period from user input
262
  def parse_period(query):
263
+ # Match specific year ranges (e.g., "between 2010 and 2020", "2000–2008")
264
  match = re.search(r'(?:between|from)\s*(\d{4})\s*(?:and|to|-|–)\s*(\d{4})', query, re.IGNORECASE)
265
  if match:
266
  start_year, end_year = map(int, match.groups())
 
271
  duration, start_year = map(int, match.groups())
272
  end_year = start_year + duration - 1
273
  return start_year, end_year, duration
274
+ # Match general duration queries (e.g., "past 10 years", "3-year growth rate")
275
  match = re.search(r'past\s*(\d+)-year|\b(\d+)-year.*(?:return|growth\s*rate)', query, re.IGNORECASE)
276
  if match:
277
  duration = int(match.group(1) or match.group(2))
 
283
 
284
  # Calculate average growth rate
285
  def calculate_growth_rate(start_year, end_year, duration=None):
 
 
 
 
 
 
 
286
  if df_yearly is None or start_year is None or end_year is None:
287
  return None, "Data not available or invalid period."
288
  df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
 
326
 
327
  # Normalize and check cache
328
  cache_key = user_input.lower().strip()
329
+ cache_keys = list(response_cache.keys())
330
+ closest_key = cache_key if cache_key in response_cache else get_closest_cache_key(cache_key, cache_keys)
331
  if closest_key:
332
  logger.info(f"Cache hit for: {closest_key}")
333
  response = response_cache[closest_key]
 
384
  # Construct prompt
385
  full_prompt = prompt_prefix + user_input + "\nA:"
386
  try:
387
+ inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
 
388
  except Exception as e:
389
  logger.error(f"Error tokenizing input: {e}")
390
  response = f"Error: Failed to process input: {str(e)}"
 
400
  with torch.inference_mode():
401
  logger.info("Generating response with model")
402
  gen_start_time = time.time()
403
+ outputs = model.generate(
404
+ **inputs,
405
+ max_new_tokens=40, # Reduced for faster inference
406
+ min_length=20,
407
+ do_sample=False,
408
+ repetition_penalty=2.0,
409
+ pad_token_id=tokenizer.eos_token_id
410
+ )
411
  gen_end_time = time.time()
412
  logger.info(f"Generation time: {gen_end_time - gen_start_time:.2f} seconds")
413
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
 
441
  # Save cache on exit
442
  def save_cache():
443
  try:
444
+ with open(cache_file, 'w') as f:
445
  json.dump(response_cache, f, indent=2)
446
  logger.info("Saved cache to cache.json")
447
  except Exception as e: