Spaces:
Running
Running
Update finetuned_model.py
Browse files- finetuned_model.py +12 -6
finetuned_model.py
CHANGED
|
@@ -68,10 +68,10 @@ for _, row in df.iterrows():
|
|
| 68 |
"summary": f"On {date}, the S&P 500 closed at {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
|
| 69 |
})
|
| 70 |
|
| 71 |
-
# Period-specific questions (1-year, 3-year, 5-year, and custom ranges)
|
| 72 |
years = df['Date'].dt.year.unique()
|
| 73 |
for year in years:
|
| 74 |
-
for duration in [1, 3, 5]:
|
| 75 |
start_year = year
|
| 76 |
end_year = year + duration - 1
|
| 77 |
if end_year <= df['Date'].dt.year.max():
|
|
@@ -88,7 +88,7 @@ for year in years:
|
|
| 88 |
})
|
| 89 |
|
| 90 |
# Custom period questions
|
| 91 |
-
custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008)]
|
| 92 |
for start_year, end_year in custom_periods:
|
| 93 |
df_period = df[(df['Date'].dt.year >= start_year) & (df['Date'].dt.year <= end_year)]
|
| 94 |
if not df_period.empty:
|
|
@@ -115,6 +115,12 @@ for amount in amounts:
|
|
| 115 |
"answer": f"Assuming a 10% average annual return, ${amount:,.0f} invested in the S&P 500 would grow to approximately ${future_value:,.0f} in {n} years with annual compounding."
|
| 116 |
})
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
# Add general S&P 500 growth rate question
|
| 119 |
qa_pairs.append({
|
| 120 |
"question": "What is the S&P 500 index fund average growth rate?",
|
|
@@ -157,7 +163,7 @@ training_args = TrainingArguments(
|
|
| 157 |
learning_rate=1e-5,
|
| 158 |
per_device_train_batch_size=4,
|
| 159 |
per_device_eval_batch_size=4,
|
| 160 |
-
num_train_epochs=
|
| 161 |
weight_decay=0.01,
|
| 162 |
logging_steps=10,
|
| 163 |
save_strategy="epoch",
|
|
@@ -184,7 +190,7 @@ trainer.save_model("./finetuned_model")
|
|
| 184 |
tokenizer.save_pretrained("./finetuned_model")
|
| 185 |
|
| 186 |
# Test the model
|
| 187 |
-
input_text = "What
|
| 188 |
inputs = tokenizer(input_text, return_tensors="pt")
|
| 189 |
outputs = model.generate(**inputs, max_new_tokens=50)
|
| 190 |
-
|
|
|
|
| 68 |
"summary": f"On {date}, the S&P 500 closed at {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
|
| 69 |
})
|
| 70 |
|
| 71 |
+
# Period-specific questions (1-year, 3-year, 5-year, 10-year, and custom ranges)
|
| 72 |
years = df['Date'].dt.year.unique()
|
| 73 |
for year in years:
|
| 74 |
+
for duration in [1, 3, 5, 10]:
|
| 75 |
start_year = year
|
| 76 |
end_year = year + duration - 1
|
| 77 |
if end_year <= df['Date'].dt.year.max():
|
|
|
|
| 88 |
})
|
| 89 |
|
| 90 |
# Custom period questions
|
| 91 |
+
custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008), (2015, 2024)]
|
| 92 |
for start_year, end_year in custom_periods:
|
| 93 |
df_period = df[(df['Date'].dt.year >= start_year) & (df['Date'].dt.year <= end_year)]
|
| 94 |
if not df_period.empty:
|
|
|
|
| 115 |
"answer": f"Assuming a 10% average annual return, ${amount:,.0f} invested in the S&P 500 would grow to approximately ${future_value:,.0f} in {n} years with annual compounding."
|
| 116 |
})
|
| 117 |
|
| 118 |
+
# Add specific 10-year growth rate question
|
| 119 |
+
qa_pairs.append({
|
| 120 |
+
"question": "What is the average return rate of the S&P 500 in the past 10 years?",
|
| 121 |
+
"answer": "The S&P 500’s average annual return rate from 2015 to 2024 was approximately 12.2%, including dividends, based on historical data."
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
# Add general S&P 500 growth rate question
|
| 125 |
qa_pairs.append({
|
| 126 |
"question": "What is the S&P 500 index fund average growth rate?",
|
|
|
|
| 163 |
learning_rate=1e-5,
|
| 164 |
per_device_train_batch_size=4,
|
| 165 |
per_device_eval_batch_size=4,
|
| 166 |
+
num_train_epochs=7,
|
| 167 |
weight_decay=0.01,
|
| 168 |
logging_steps=10,
|
| 169 |
save_strategy="epoch",
|
|
|
|
| 190 |
tokenizer.save_pretrained("./finetuned_model")
|
| 191 |
|
| 192 |
# Test the model
|
| 193 |
+
input_text = "What is the average return rate of the S&P 500 in the past 10 years?"
|
| 194 |
inputs = tokenizer(input_text, return_tensors="pt")
|
| 195 |
outputs = model.generate(**inputs, max_new_tokens=50)
|
| 196 |
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|