Spaces:
Running
Running
Update finetuned_model.py
Browse files- finetuned_model.py +10 -22
finetuned_model.py
CHANGED
@@ -78,7 +78,7 @@ for _, row in df_yearly.iterrows():
|
|
78 |
"summary": f"In {year}, the S&P 500 averaged {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
|
79 |
})
|
80 |
|
81 |
-
# Period-specific questions (1-year, 3-year, 5-year, 10-year, and
|
82 |
years = df_yearly['Year'].unique()
|
83 |
for year in years:
|
84 |
for duration in [1, 3, 5, 10]:
|
@@ -97,8 +97,8 @@ for year in years:
|
|
97 |
"answer": f"The S&P 500’s {duration}-year average annual inflation-adjusted return from {start_year} to {end_year} was approximately {avg_real_return:.1f}%."
|
98 |
})
|
99 |
|
100 |
-
# Custom period questions
|
101 |
-
custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008), (2015, 2024)
|
102 |
for start_year, end_year in custom_periods:
|
103 |
df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
|
104 |
if not df_period.empty:
|
@@ -139,12 +139,8 @@ qa_pairs.append({
|
|
139 |
"answer": "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data."
|
140 |
})
|
141 |
qa_pairs.append({
|
142 |
-
"question": "What was the average annual return of the S&P 500 between
|
143 |
-
"answer": "The S&P 500’s average annual return from
|
144 |
-
})
|
145 |
-
qa_pairs.append({
|
146 |
-
"question": "What was the average annual return of the S&P 500 in the past 5 years?",
|
147 |
-
"answer": "The S&P 500’s average annual return from 2020 to 2024 was approximately 10.5%, including dividends, based on historical data."
|
148 |
})
|
149 |
|
150 |
# Save to JSON
|
@@ -181,8 +177,8 @@ training_args = TrainingArguments(
|
|
181 |
output_dir="./finetuned_model",
|
182 |
evaluation_strategy="epoch",
|
183 |
learning_rate=1e-5,
|
184 |
-
per_device_train_batch_size=
|
185 |
-
per_device_eval_batch_size=
|
186 |
num_train_epochs=7,
|
187 |
weight_decay=0.01,
|
188 |
logging_steps=10,
|
@@ -210,15 +206,7 @@ trainer.save_model("./finetuned_model")
|
|
210 |
tokenizer.save_pretrained("./finetuned_model")
|
211 |
|
212 |
# Test the model
|
213 |
-
input_text = "What was the average annual return of the S&P 500 between
|
214 |
-
inputs = tokenizer(input_text, return_tensors="pt"
|
215 |
-
|
216 |
-
with torch.inference_mode():
|
217 |
-
outputs = model.generate(
|
218 |
-
**inputs,
|
219 |
-
max_new_tokens=20,
|
220 |
-
repetition_penalty=3.0,
|
221 |
-
no_repeat_ngram_size=2,
|
222 |
-
pad_token_id=tokenizer.eos_token_id
|
223 |
-
)
|
224 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
|
|
78 |
"summary": f"In {year}, the S&P 500 averaged {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
|
79 |
})
|
80 |
|
81 |
+
# Period-specific questions (1-year, 3-year, 5-year, 10-year, and custom ranges)
|
82 |
years = df_yearly['Year'].unique()
|
83 |
for year in years:
|
84 |
for duration in [1, 3, 5, 10]:
|
|
|
97 |
"answer": f"The S&P 500’s {duration}-year average annual inflation-adjusted return from {start_year} to {end_year} was approximately {avg_real_return:.1f}%."
|
98 |
})
|
99 |
|
100 |
+
# Custom period questions
|
101 |
+
custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008), (2015, 2024)]
|
102 |
for start_year, end_year in custom_periods:
|
103 |
df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
|
104 |
if not df_period.empty:
|
|
|
139 |
"answer": "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data."
|
140 |
})
|
141 |
qa_pairs.append({
|
142 |
+
"question": "What was the average annual return of the S&P 500 between 2010 and 2020?",
|
143 |
+
"answer": "The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends, driven by post-financial crisis recovery."
|
|
|
|
|
|
|
|
|
144 |
})
|
145 |
|
146 |
# Save to JSON
|
|
|
177 |
output_dir="./finetuned_model",
|
178 |
evaluation_strategy="epoch",
|
179 |
learning_rate=1e-5,
|
180 |
+
per_device_train_batch_size=8, # Increased for faster training
|
181 |
+
per_device_eval_batch_size=8,
|
182 |
num_train_epochs=7,
|
183 |
weight_decay=0.01,
|
184 |
logging_steps=10,
|
|
|
206 |
tokenizer.save_pretrained("./finetuned_model")
|
207 |
|
208 |
# Test the model
|
209 |
+
input_text = "What was the average annual return of the S&P 500 between 2010 and 2020?"
|
210 |
+
inputs = tokenizer(input_text, return_tensors="pt")
|
211 |
+
outputs = model.generate(**inputs, max_new_tokens=40)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|