Spaces:

zakyirhab0
/

Klasifikasi_Komentar

Sleeping

App Files Files Community

zakyirhab0 commited on 9 days ago

Commit

9f6b986

verified ·

1 Parent(s): 86573e5

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -41

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ login(token=huggingface_token)
 # Inisialisasi API
 api = HfApi()
-repo_id = "zakyirhab0/Klasifikasi_Komentar"
 # === Preprocessing Functions === #
 candidate_list = ["Lalu Muhamad Iqbal", "Indah Dhamayanti Putri", "Zulkieflimansyah", "M Suhaili", "Sitti Rohmi Djalilah", "Musyafirin"]
@@ -110,8 +110,8 @@ def load_dictionary(file_path, file_type='json'):
         df = pd.read_csv(file_path, names=['slang', 'formal'])
         return pd.Series(df['formal'].values, index=df['slang']).to_dict()
-ntb_dict_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="ntb_dict.json", repo_type="dataset")
-slang_dict_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="kamusalay.csv", repo_type="dataset")
 ntb_dict = load_dictionary(ntb_dict_path, 'json')
 slang_dict = load_dictionary(slang_dict_path, 'csv')
@@ -148,14 +148,14 @@ def update_history_json(history_path, komentar, link, model_data, field, date):
     api.upload_file(
         path_or_fileobj="history_temp.json",
         path_in_repo=f"history_{candidate.lower().replace(' ', '_')}.json",
-        repo_id = "zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
 # Fungsi untuk memperbarui dataset pelatihan dengan data baru
 def update_training_dataset(output, candidate):
-    dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
-    history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"history_{candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
     try:
         required_columns = ['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'sentimen', 'tanggal', 'tanggal_masuk']
@@ -236,7 +236,7 @@ def update_training_dataset(output, candidate):
         api.upload_file(
             path_or_fileobj="dataset_temp.xlsx",
             path_in_repo=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx",
-            repo_id = "zakyirhab0/Klasifikasi_Komentar",
             repo_type="dataset"
         )
         st.success(f"Data successfully updated in {candidate}'s training dataset.")
@@ -263,7 +263,7 @@ def save_keywords_to_huggingface(keyword_dict):
     api.upload_file(
         path_or_fileobj=file_name,
         path_in_repo=file_name,
-        repo_id = "zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
@@ -272,7 +272,7 @@ def save_keywords_to_huggingface(keyword_dict):
 # Load keywords dari Hugging Face
 def load_keywords_from_huggingface():
     try:
-        keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
         with open(keywords_path, 'r', encoding='utf-8') as f:
             return json.load(f)
     except Exception as e:
@@ -307,7 +307,7 @@ def clustering_based_evaluation(df, n_clusters=10):
     return df
 def load_and_process_data(dataset_path, history_path):
-    dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     df = pd.read_excel(dataset_path)
     df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
@@ -384,7 +384,7 @@ file_name = "corrected_comments.xlsx"
 def load_corrected_comments():
     try:
         print("🔄 Mencoba memuat data dari Hugging Face...")
-        corrected_comments_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
         return pd.read_excel(corrected_comments_path, sheet_name='Corrected Comments')
     except Exception as e:
         print(f"⚠ Gagal memuat dari Hugging Face: {e}")
@@ -408,7 +408,7 @@ def save_corrected_comments(data):
     api.upload_file(
         path_or_fileobj=file_name,
         path_in_repo=file_name,
-        repo_id = "zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
@@ -445,7 +445,7 @@ def run_clustering_for_ba_lainnya():
     st.write(f"**'BA Lainnya' Data:** {len(ba_lainnya_data)} rows")
     # Load keywords.json dari Hugging Face
-    keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
     # Buka file yang telah diunduh
     with open(keywords_path, 'r') as f:
@@ -460,8 +460,8 @@ def run_clustering_for_ba_lainnya():
     sarcasm_detector = pipeline('sentiment-analysis', model='unitary/toxic-bert')
-    dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
-    corrected_comments_file = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"corrected_comments_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     try:
         train_data = pd.read_excel(dataset_path)
@@ -728,7 +728,7 @@ def run_clustering_for_ba_lainnya():
             api.upload_file(
                 path_or_fileobj=clustered_data_filename,
                 path_in_repo=clustered_data_filename,
-                repo_id = "zakyirhab0/Klasifikasi_Komentar",
                 repo_type="dataset"
             )
             st.success(f"✅ Clustered Data berhasil diunggah ke Hugging Face: {clustered_data_filename}")
@@ -753,7 +753,7 @@ def run_clustering_for_ba_lainnya():
                 api.upload_file(
                     path_or_fileobj=corrected_comments_file,
                     path_in_repo=corrected_comments_file,
-                    repo_id = "zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                 st.success(f"✅ Corrected Comments berhasil disimpan & diunggah ke Hugging Face: {corrected_comments_file}")
@@ -777,8 +777,8 @@ menu = st.sidebar.radio("Select a Feature", ["Model-Based Classification","Clust
 if menu == "Model-Based Classification":
     st.title("Model-Based Classification")
     candidate = st.selectbox("Choose a candidate:", candidate_list)
-    model_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
-    vectorizer_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
     # Save the selected candidate to session state
     st.session_state['candidate'] = candidate
@@ -927,7 +927,7 @@ if menu == "Model-Based Classification":
     api.upload_file(
         path_or_fileobj=classification_results_file,
         path_in_repo=classification_results_file,
-        repo_id = "zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
@@ -953,7 +953,7 @@ if menu == "Model-Based Classification":
                 api.upload_file(
                     path_or_fileobj=training_dataset_file,
                     path_in_repo=training_dataset_file,
-                    repo_id = "zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
@@ -980,12 +980,12 @@ if menu == "View Training Dataset":
     selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_view')
     # Path dataset
-    dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
-    history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
     # Memuat dataset
     try:
-        dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
         df = pd.read_excel(dataset_path)
         # Memastikan kolom yang diperlukan ada
@@ -1097,7 +1097,7 @@ if menu == "View Training Dataset":
             st.subheader("History of Data Additions")
             try:
-                history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
                 with open(history_path, "r") as f:
                     history = json.load(f)
@@ -1138,7 +1138,7 @@ if menu == "View Training Dataset":
                 api.upload_file(
                     path_or_fileobj=filtered_dataset_file,
                     path_in_repo=filtered_dataset_file,
-                    repo_id = "zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
@@ -1161,7 +1161,7 @@ if menu == "View Training Dataset":
                 api.upload_file(
                     path_or_fileobj=full_dataset_file,
                     path_in_repo=full_dataset_file,
-                    repo_id = "zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
@@ -1212,7 +1212,7 @@ if menu == "View Training Dataset":
                     api.upload_file(
                         path_or_fileobj=aggregated_dataset_file,
                         path_in_repo=aggregated_dataset_file,
-                        repo_id = "zakyirhab0/Klasifikasi_Komentar",
                         repo_type="dataset"
                     )
@@ -1313,12 +1313,12 @@ if menu == "Evaluate Data Train":
     try:
         # **📥 Load Dataset dari Hugging Face**
-        dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
         df = pd.read_excel(dataset_path)
         # **✅ Load existing keyword dictionary dari Hugging Face**
         try:
-            keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
             with open(keywords_path, 'r', encoding="utf-8") as f:
                 keyword_dict = json.load(f)
@@ -1461,7 +1461,7 @@ if menu == "Evaluate Data Train":
                 api.upload_file(
                     path_or_fileobj=file_name,
                     path_in_repo=file_name,
-                    repo_id = "zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
@@ -1473,7 +1473,7 @@ if menu == "Evaluate Data Train":
         # **4️⃣ Menangani FileNotFoundError & Error Lainnya**
         try:
             # Load dataset
-            dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
             df = pd.read_json(dataset_path, lines=True)
             st.dataframe(df)
         except FileNotFoundError:
@@ -1487,12 +1487,12 @@ if menu == "Retraining Model":
     selected_candidate = st.selectbox("Select a candidate to retrain the model:", list(candidate_list))
     # Load model & vectorizer dari Hugging Face Hub
-    model_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
-    vectorizer_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
     # Load dataset dari Hugging Face
-    dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
-    retrain_history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"retrain_history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")  # Jika ingin menyimpan history retrain secara lokal
     try:
         # Load dataset
@@ -1627,7 +1627,7 @@ if menu == "Maximize Preprocessing":
     # Load Dataset Train
     candidate = st.selectbox("Choose a candidate:", list(candidate_list))
-    dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     try:
         # Load dataset
         data = pd.read_excel(dataset_path)
@@ -1716,7 +1716,7 @@ if menu == "Maximize Preprocessing":
                 api.upload_file(
                     path_or_fileobj=kamus_alay_filename,
                     path_in_repo=kamus_alay_filename,
-                    repo_id = "zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                 st.success(f"✅ Kamus Alay successfully saved and uploaded to Hugging Face!")
@@ -1735,7 +1735,7 @@ if menu == "Maximize Preprocessing":
                 api.upload_file(
                     path_or_fileobj=kamus_ntb_filename,
                     path_in_repo=kamus_ntb_filename,
-                    repo_id = "zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                 st.success(f"✅ Kamus NTB successfully saved and uploaded to Hugging Face!")
@@ -1753,7 +1753,7 @@ if menu == "Update Keywords":
     # Load existing keyword dictionary from Hugging Face
     try:
-        keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
         with open(keywords_path, 'r', encoding='utf-8') as f:
             keyword_dict = json.load(f)
@@ -1825,7 +1825,7 @@ if menu == "Update Keywords":
     # Analyze Training Data
     st.subheader("Analyze Training Data")
-    dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     try:
         train_data = pd.read_excel(dataset_path)
         if train_data.empty:

 # Inisialisasi API
 api = HfApi()
+repo_id="zakyirhab0/Klasifikasi_Komentar"
 # === Preprocessing Functions === #
 candidate_list = ["Lalu Muhamad Iqbal", "Indah Dhamayanti Putri", "Zulkieflimansyah", "M Suhaili", "Sitti Rohmi Djalilah", "Musyafirin"]
         df = pd.read_csv(file_path, names=['slang', 'formal'])
         return pd.Series(df['formal'].values, index=df['slang']).to_dict()
+ntb_dict_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="ntb_dict.json", repo_type="dataset")
+slang_dict_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="kamusalay.csv", repo_type="dataset")
 ntb_dict = load_dictionary(ntb_dict_path, 'json')
 slang_dict = load_dictionary(slang_dict_path, 'csv')
     api.upload_file(
         path_or_fileobj="history_temp.json",
         path_in_repo=f"history_{candidate.lower().replace(' ', '_')}.json",
+        repo_id="zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
 # Fungsi untuk memperbarui dataset pelatihan dengan data baru
 def update_training_dataset(output, candidate):
+    dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
+    history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
     try:
         required_columns = ['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'sentimen', 'tanggal', 'tanggal_masuk']
         api.upload_file(
             path_or_fileobj="dataset_temp.xlsx",
             path_in_repo=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx",
+            repo_id="zakyirhab0/Klasifikasi_Komentar",
             repo_type="dataset"
         )
         st.success(f"Data successfully updated in {candidate}'s training dataset.")
     api.upload_file(
         path_or_fileobj=file_name,
         path_in_repo=file_name,
+        repo_id="zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
 # Load keywords dari Hugging Face
 def load_keywords_from_huggingface():
     try:
+        keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
         with open(keywords_path, 'r', encoding='utf-8') as f:
             return json.load(f)
     except Exception as e:
     return df
 def load_and_process_data(dataset_path, history_path):
+    dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     df = pd.read_excel(dataset_path)
     df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
 def load_corrected_comments():
     try:
         print("🔄 Mencoba memuat data dari Hugging Face...")
+        corrected_comments_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
         return pd.read_excel(corrected_comments_path, sheet_name='Corrected Comments')
     except Exception as e:
         print(f"⚠ Gagal memuat dari Hugging Face: {e}")
     api.upload_file(
         path_or_fileobj=file_name,
         path_in_repo=file_name,
+        repo_id="zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
     st.write(f"**'BA Lainnya' Data:** {len(ba_lainnya_data)} rows")
     # Load keywords.json dari Hugging Face
+    keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
     # Buka file yang telah diunduh
     with open(keywords_path, 'r') as f:
     sarcasm_detector = pipeline('sentiment-analysis', model='unitary/toxic-bert')
+    dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
+    corrected_comments_file = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"corrected_comments_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     try:
         train_data = pd.read_excel(dataset_path)
             api.upload_file(
                 path_or_fileobj=clustered_data_filename,
                 path_in_repo=clustered_data_filename,
+                repo_id="zakyirhab0/Klasifikasi_Komentar",
                 repo_type="dataset"
             )
             st.success(f"✅ Clustered Data berhasil diunggah ke Hugging Face: {clustered_data_filename}")
                 api.upload_file(
                     path_or_fileobj=corrected_comments_file,
                     path_in_repo=corrected_comments_file,
+                    repo_id="zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                 st.success(f"✅ Corrected Comments berhasil disimpan & diunggah ke Hugging Face: {corrected_comments_file}")
 if menu == "Model-Based Classification":
     st.title("Model-Based Classification")
     candidate = st.selectbox("Choose a candidate:", candidate_list)
+    model_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
+    vectorizer_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
     # Save the selected candidate to session state
     st.session_state['candidate'] = candidate
     api.upload_file(
         path_or_fileobj=classification_results_file,
         path_in_repo=classification_results_file,
+        repo_id="zakyirhab0/Klasifikasi_Komentar",
         repo_type="dataset"
     )
                 api.upload_file(
                     path_or_fileobj=training_dataset_file,
                     path_in_repo=training_dataset_file,
+                    repo_id="zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
     selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_view')
     # Path dataset
+    dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
+    history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
     # Memuat dataset
     try:
+        dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
         df = pd.read_excel(dataset_path)
         # Memastikan kolom yang diperlukan ada
             st.subheader("History of Data Additions")
             try:
+                history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
                 with open(history_path, "r") as f:
                     history = json.load(f)
                 api.upload_file(
                     path_or_fileobj=filtered_dataset_file,
                     path_in_repo=filtered_dataset_file,
+                    repo_id="zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                 api.upload_file(
                     path_or_fileobj=full_dataset_file,
                     path_in_repo=full_dataset_file,
+                    repo_id="zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                     api.upload_file(
                         path_or_fileobj=aggregated_dataset_file,
                         path_in_repo=aggregated_dataset_file,
+                        repo_id="zakyirhab0/Klasifikasi_Komentar",
                         repo_type="dataset"
                     )
     try:
         # **📥 Load Dataset dari Hugging Face**
+        dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
         df = pd.read_excel(dataset_path)
         # **✅ Load existing keyword dictionary dari Hugging Face**
         try:
+            keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
             with open(keywords_path, 'r', encoding="utf-8") as f:
                 keyword_dict = json.load(f)
                 api.upload_file(
                     path_or_fileobj=file_name,
                     path_in_repo=file_name,
+                    repo_id="zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
         # **4️⃣ Menangani FileNotFoundError & Error Lainnya**
         try:
             # Load dataset
+            dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
             df = pd.read_json(dataset_path, lines=True)
             st.dataframe(df)
         except FileNotFoundError:
     selected_candidate = st.selectbox("Select a candidate to retrain the model:", list(candidate_list))
     # Load model & vectorizer dari Hugging Face Hub
+    model_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
+    vectorizer_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
     # Load dataset dari Hugging Face
+    dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
+    retrain_history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"retrain_history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")  # Jika ingin menyimpan history retrain secara lokal
     try:
         # Load dataset
     # Load Dataset Train
     candidate = st.selectbox("Choose a candidate:", list(candidate_list))
+    dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     try:
         # Load dataset
         data = pd.read_excel(dataset_path)
                 api.upload_file(
                     path_or_fileobj=kamus_alay_filename,
                     path_in_repo=kamus_alay_filename,
+                    repo_id="zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                 st.success(f"✅ Kamus Alay successfully saved and uploaded to Hugging Face!")
                 api.upload_file(
                     path_or_fileobj=kamus_ntb_filename,
                     path_in_repo=kamus_ntb_filename,
+                    repo_id="zakyirhab0/Klasifikasi_Komentar",
                     repo_type="dataset"
                 )
                 st.success(f"✅ Kamus NTB successfully saved and uploaded to Hugging Face!")
     # Load existing keyword dictionary from Hugging Face
     try:
+        keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
         with open(keywords_path, 'r', encoding='utf-8') as f:
             keyword_dict = json.load(f)
     # Analyze Training Data
     st.subheader("Analyze Training Data")
+    dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
     try:
         train_data = pd.read_excel(dataset_path)
         if train_data.empty: