AnilNiraula commited on
Commit
8b40e00
·
verified ·
1 Parent(s): e9cecd4

Update finetuned_model.py

Browse files
Files changed (1) hide show
  1. finetuned_model.py +10 -22
finetuned_model.py CHANGED
@@ -78,7 +78,7 @@ for _, row in df_yearly.iterrows():
78
  "summary": f"In {year}, the S&P 500 averaged {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
79
  })
80
 
81
- # Period-specific questions (1-year, 3-year, 5-year, 10-year, and recent ranges)
82
  years = df_yearly['Year'].unique()
83
  for year in years:
84
  for duration in [1, 3, 5, 10]:
@@ -97,8 +97,8 @@ for year in years:
97
  "answer": f"The S&P 500’s {duration}-year average annual inflation-adjusted return from {start_year} to {end_year} was approximately {avg_real_return:.1f}%."
98
  })
99
 
100
- # Custom period questions, including recent periods
101
- custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008), (2015, 2024), (2020, 2022), (2020, 2024)]
102
  for start_year, end_year in custom_periods:
103
  df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
104
  if not df_period.empty:
@@ -139,12 +139,8 @@ qa_pairs.append({
139
  "answer": "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data."
140
  })
141
  qa_pairs.append({
142
- "question": "What was the average annual return of the S&P 500 between 2020 and 2022?",
143
- "answer": "The S&P 500’s average annual return from 2020 to 2022 was approximately 8.3%, including dividends, with significant volatility due to the COVID-19 recovery and 2022 bear market."
144
- })
145
- qa_pairs.append({
146
- "question": "What was the average annual return of the S&P 500 in the past 5 years?",
147
- "answer": "The S&P 500’s average annual return from 2020 to 2024 was approximately 10.5%, including dividends, based on historical data."
148
  })
149
 
150
  # Save to JSON
@@ -181,8 +177,8 @@ training_args = TrainingArguments(
181
  output_dir="./finetuned_model",
182
  evaluation_strategy="epoch",
183
  learning_rate=1e-5,
184
- per_device_train_batch_size=16,
185
- per_device_eval_batch_size=16,
186
  num_train_epochs=7,
187
  weight_decay=0.01,
188
  logging_steps=10,
@@ -210,15 +206,7 @@ trainer.save_model("./finetuned_model")
210
  tokenizer.save_pretrained("./finetuned_model")
211
 
212
  # Test the model
213
- input_text = "What was the average annual return of the S&P 500 between 2020 and 2022?"
214
- inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True)
215
- inputs = {k: v.to("cpu") for k, v in inputs.items()}
216
- with torch.inference_mode():
217
- outputs = model.generate(
218
- **inputs,
219
- max_new_tokens=20,
220
- repetition_penalty=3.0,
221
- no_repeat_ngram_size=2,
222
- pad_token_id=tokenizer.eos_token_id
223
- )
224
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 
78
  "summary": f"In {year}, the S&P 500 averaged {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
79
  })
80
 
81
+ # Period-specific questions (1-year, 3-year, 5-year, 10-year, and custom ranges)
82
  years = df_yearly['Year'].unique()
83
  for year in years:
84
  for duration in [1, 3, 5, 10]:
 
97
  "answer": f"The S&P 500’s {duration}-year average annual inflation-adjusted return from {start_year} to {end_year} was approximately {avg_real_return:.1f}%."
98
  })
99
 
100
+ # Custom period questions
101
+ custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008), (2015, 2024)]
102
  for start_year, end_year in custom_periods:
103
  df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
104
  if not df_period.empty:
 
139
  "answer": "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data."
140
  })
141
  qa_pairs.append({
142
+ "question": "What was the average annual return of the S&P 500 between 2010 and 2020?",
143
+ "answer": "The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends, driven by post-financial crisis recovery."
 
 
 
 
144
  })
145
 
146
  # Save to JSON
 
177
  output_dir="./finetuned_model",
178
  evaluation_strategy="epoch",
179
  learning_rate=1e-5,
180
+ per_device_train_batch_size=8, # Increased for faster training
181
+ per_device_eval_batch_size=8,
182
  num_train_epochs=7,
183
  weight_decay=0.01,
184
  logging_steps=10,
 
206
  tokenizer.save_pretrained("./finetuned_model")
207
 
208
  # Test the model
209
+ input_text = "What was the average annual return of the S&P 500 between 2010 and 2020?"
210
+ inputs = tokenizer(input_text, return_tensors="pt")
211
+ outputs = model.generate(**inputs, max_new_tokens=40)
 
 
 
 
 
 
 
 
212
  print(tokenizer.decode(outputs[0], skip_special_tokens=True))