Ahmedik95316 commited on
Commit
075b701
·
verified ·
1 Parent(s): 8df371b

Update initialize_system.py

Browse files
Files changed (1) hide show
  1. initialize_system.py +23 -5
initialize_system.py CHANGED
@@ -7,13 +7,16 @@ import pandas as pd
7
  from pathlib import Path
8
  from datetime import datetime
9
  from sklearn.pipeline import Pipeline
 
10
  from sklearn.model_selection import cross_validate
11
  from sklearn.linear_model import LogisticRegression
 
12
  from sklearn.model_selection import train_test_split
13
  from sklearn.metrics import accuracy_score, f1_score
14
  from sklearn.feature_extraction.text import TfidfVectorizer
15
 
16
 
 
17
  # Import the new path manager# Cal
18
  try:
19
  from path_config import path_manager
@@ -210,13 +213,28 @@ def run_initial_training():
210
  log_step("Creating pipeline from existing components...")
211
  try:
212
  # Load existing components
213
- model = joblib.load(model_path)
214
  vectorizer = joblib.load(vectorizer_path)
215
-
216
  # Create pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  pipeline = Pipeline([
218
- ('vectorizer', vectorizer),
219
- ('model', model)
220
  ])
221
 
222
  # Save pipeline
@@ -370,7 +388,7 @@ def run_initial_training():
370
  # Save metadata
371
  metadata = {
372
  "model_version": "v1.0_init",
373
- "model_type": "logistic_regression_pipeline",
374
  "test_accuracy": float(accuracy),
375
  "test_f1": float(f1),
376
  "train_size": len(X_train),
 
7
  from pathlib import Path
8
  from datetime import datetime
9
  from sklearn.pipeline import Pipeline
10
+ from sklearn.ensemble import VotingClassifier
11
  from sklearn.model_selection import cross_validate
12
  from sklearn.linear_model import LogisticRegression
13
+ from sklearn.ensemble import RandomForestClassifier
14
  from sklearn.model_selection import train_test_split
15
  from sklearn.metrics import accuracy_score, f1_score
16
  from sklearn.feature_extraction.text import TfidfVectorizer
17
 
18
 
19
+
20
  # Import the new path manager# Cal
21
  try:
22
  from path_config import path_manager
 
213
  log_step("Creating pipeline from existing components...")
214
  try:
215
  # Load existing components
216
+ # model = joblib.load(model_path)
217
  vectorizer = joblib.load(vectorizer_path)
218
+
219
  # Create pipeline
220
+ # pipeline = Pipeline([
221
+ # ('vectorizer', vectorizer),
222
+ # ('model', model)
223
+ # ])
224
+
225
+ # Create ensemble method pipeline
226
+ # Initialize ensemble model
227
+ ensemble_model = VotingClassifier(
228
+ estimators=[
229
+ ('logistic', LogisticRegression(max_iter=1000, random_state=42, class_weight='balanced')),
230
+ ('random_forest', RandomForestClassifier(n_estimators=50, random_state=42, class_weight='balanced'))
231
+ ],
232
+ voting='soft'
233
+ )
234
+
235
  pipeline = Pipeline([
236
+ ('vectorizer', TfidfVectorizer(...)),
237
+ ('model', ensemble_model) # Use ensemble instead of single model
238
  ])
239
 
240
  # Save pipeline
 
388
  # Save metadata
389
  metadata = {
390
  "model_version": "v1.0_init",
391
+ "model_type": "ensemble_voting_pipeline", # "logistic_regression_pipeline",
392
  "test_accuracy": float(accuracy),
393
  "test_f1": float(f1),
394
  "train_size": len(X_train),