Dada80 commited on
Commit
1451ade
·
verified ·
1 Parent(s): 6f0d730

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -0
README.md CHANGED
@@ -33,3 +33,48 @@ This model classifies news headlines as either NBC or Fox News.
33
 
34
  - Accuracy Score
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  - Accuracy Score
35
 
36
+ ### Model Evaluation
37
+ ```python
38
+ import pandas as pd
39
+ import joblib
40
+ from huggingface_hub import hf_hub_download
41
+ from sklearn.feature_extraction.text import TfidfVectorizer
42
+ from sklearn.metrics import classification_report
43
+
44
+ # Mount to drive
45
+ from google.colab import drive
46
+ drive.mount('/content/drive')
47
+
48
+ # Load test set
49
+ test_df = pd.read_csv("/content/drive/MyDrive/test_data_random_subset.csv", encoding="Windows-1252")
50
+
51
+ # Log in w/ huggingface token
52
+ # token: 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
53
+ !huggingface-cli login
54
+
55
+ # Download the model
56
+ model = hf_hub_download(repo_id = "CIS5190FinalProj/RandomForest", filename = "best_rf_model.pkl")
57
+
58
+ # Download the vectorizer
59
+ tfidf_vectorizer = hf_hub_download(repo_id = "CIS5190FinalProj/RandomForest", filename = "tfidf_vectorizer.pkl")
60
+
61
+ # Load the model
62
+ pipeline = joblib.load(model)
63
+
64
+ # Load the vectorizer
65
+ tfidf_vectorizer = joblib.load(tfidf_vectorizer)
66
+
67
+ # Extract the headlines from the test set
68
+ X_test = test_df['title']
69
+
70
+ # Apply transformation to the headlines into numerical features
71
+ X_test_transformed = tfidf_vectorizer.transform(X_test)
72
+
73
+ # Make predictions using the pipeline
74
+ y_pred = pipeline.predict(X_test_transformed)
75
+
76
+ # Extract 'labels' as target
77
+ y_test = test_df['label']
78
+
79
+ # Print classification report
80
+ print(classification_report(y_test, y_pred))