Spaces:

AnilNiraula
/

FinChat

Running

App Files Files Community

AnilNiraula commited on Jul 8

Commit

ff9da77

verified ·

1 Parent(s): 127973d

Update finetuned_model.py

Browse files

Files changed (1) hide show

finetuned_model.py +7 -7

finetuned_model.py CHANGED Viewed

@@ -78,7 +78,7 @@ for _, row in df_yearly.iterrows():
         "summary": f"In {year}, the S&P 500 averaged {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
     })
-# Period-specific questions (1-year, 3-year, 5-year, 10-year, and custom ranges)
 years = df_yearly['Year'].unique()
 for year in years:
     for duration in [1, 3, 5, 10]:
@@ -97,8 +97,8 @@ for year in years:
                 "answer": f"The S&P 500’s {duration}-year average annual inflation-adjusted return from {start_year} to {end_year} was approximately {avg_real_return:.1f}%."
             })
-# Custom period questions
-custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008), (2015, 2024)]
 for start_year, end_year in custom_periods:
     df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
     if not df_period.empty:
@@ -139,8 +139,8 @@ qa_pairs.append({
     "answer": "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data."
 })
 qa_pairs.append({
-    "question": "What was the average annual return of the S&P 500 between 2010 and 2020?",
-    "answer": "The S&P 500’s average annual return from 2010 to 2020 was approximately 13.6%, including dividends, driven by post-financial crisis recovery."
 })
 # Save to JSON
@@ -177,7 +177,7 @@ training_args = TrainingArguments(
     output_dir="./finetuned_model",
     evaluation_strategy="epoch",
     learning_rate=1e-5,
-    per_device_train_batch_size=8,  # Increased for faster training
     per_device_eval_batch_size=8,
     num_train_epochs=7,
     weight_decay=0.01,
@@ -206,7 +206,7 @@ trainer.save_model("./finetuned_model")
 tokenizer.save_pretrained("./finetuned_model")
 # Test the model
-input_text = "What was the average annual return of the S&P 500 between 2010 and 2020?"
 inputs = tokenizer(input_text, return_tensors="pt")
 outputs = model.generate(**inputs, max_new_tokens=40)
 print(tokenizer.decode(outputs[0], skip_special_tokens=True))

         "summary": f"In {year}, the S&P 500 averaged {sp500:.2f} with a {return_val:.1f}% annual return and a {real_return:.1f}% real return."
     })
+# Period-specific questions (1-year, 3-year, 5-year, 10-year, and recent ranges)
 years = df_yearly['Year'].unique()
 for year in years:
     for duration in [1, 3, 5, 10]:
                 "answer": f"The S&P 500’s {duration}-year average annual inflation-adjusted return from {start_year} to {end_year} was approximately {avg_real_return:.1f}%."
             })
+# Custom period questions, including recent periods
+custom_periods = [(2000, 2010), (2011, 2016), (2010, 2020), (2000, 2008), (2015, 2024), (2020, 2022)]
 for start_year, end_year in custom_periods:
     df_period = df_yearly[(df_yearly['Year'] >= start_year) & (df_yearly['Year'] <= end_year)]
     if not df_period.empty:
     "answer": "The S&P 500 index fund’s average annual return is approximately 10–12% over the long term (1927–2025), including dividends, based on historical data."
 })
 qa_pairs.append({
+    "question": "What was the average annual return of the S&P 500 between 2020 and 2022?",
+    "answer": "The S&P 500’s average annual return from 2020 to 2022 was approximately 9.0%, including dividends, with significant volatility due to the COVID-19 recovery and 2022 bear market."
 })
 # Save to JSON
     output_dir="./finetuned_model",
     evaluation_strategy="epoch",
     learning_rate=1e-5,
+    per_device_train_batch_size=8,
     per_device_eval_batch_size=8,
     num_train_epochs=7,
     weight_decay=0.01,
 tokenizer.save_pretrained("./finetuned_model")
 # Test the model
+input_text = "What was the average annual return of the S&P 500 between 2020 and 2022?"
 inputs = tokenizer(input_text, return_tensors="pt")
 outputs = model.generate(**inputs, max_new_tokens=40)
 print(tokenizer.decode(outputs[0], skip_special_tokens=True))