zakyirhab0 commited on
Commit
9f6b986
Β·
verified Β·
1 Parent(s): 86573e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -41
app.py CHANGED
@@ -38,7 +38,7 @@ login(token=huggingface_token)
38
  # Inisialisasi API
39
  api = HfApi()
40
 
41
- repo_id = "zakyirhab0/Klasifikasi_Komentar"
42
 
43
  # === Preprocessing Functions === #
44
  candidate_list = ["Lalu Muhamad Iqbal", "Indah Dhamayanti Putri", "Zulkieflimansyah", "M Suhaili", "Sitti Rohmi Djalilah", "Musyafirin"]
@@ -110,8 +110,8 @@ def load_dictionary(file_path, file_type='json'):
110
  df = pd.read_csv(file_path, names=['slang', 'formal'])
111
  return pd.Series(df['formal'].values, index=df['slang']).to_dict()
112
 
113
- ntb_dict_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="ntb_dict.json", repo_type="dataset")
114
- slang_dict_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="kamusalay.csv", repo_type="dataset")
115
 
116
  ntb_dict = load_dictionary(ntb_dict_path, 'json')
117
  slang_dict = load_dictionary(slang_dict_path, 'csv')
@@ -148,14 +148,14 @@ def update_history_json(history_path, komentar, link, model_data, field, date):
148
  api.upload_file(
149
  path_or_fileobj="history_temp.json",
150
  path_in_repo=f"history_{candidate.lower().replace(' ', '_')}.json",
151
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
152
  repo_type="dataset"
153
  )
154
 
155
  # Fungsi untuk memperbarui dataset pelatihan dengan data baru
156
  def update_training_dataset(output, candidate):
157
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
158
- history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"history_{candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
159
 
160
  try:
161
  required_columns = ['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'sentimen', 'tanggal', 'tanggal_masuk']
@@ -236,7 +236,7 @@ def update_training_dataset(output, candidate):
236
  api.upload_file(
237
  path_or_fileobj="dataset_temp.xlsx",
238
  path_in_repo=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx",
239
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
240
  repo_type="dataset"
241
  )
242
  st.success(f"Data successfully updated in {candidate}'s training dataset.")
@@ -263,7 +263,7 @@ def save_keywords_to_huggingface(keyword_dict):
263
  api.upload_file(
264
  path_or_fileobj=file_name,
265
  path_in_repo=file_name,
266
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
267
  repo_type="dataset"
268
  )
269
 
@@ -272,7 +272,7 @@ def save_keywords_to_huggingface(keyword_dict):
272
  # Load keywords dari Hugging Face
273
  def load_keywords_from_huggingface():
274
  try:
275
- keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
276
  with open(keywords_path, 'r', encoding='utf-8') as f:
277
  return json.load(f)
278
  except Exception as e:
@@ -307,7 +307,7 @@ def clustering_based_evaluation(df, n_clusters=10):
307
  return df
308
 
309
  def load_and_process_data(dataset_path, history_path):
310
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
311
  df = pd.read_excel(dataset_path)
312
  df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
313
 
@@ -384,7 +384,7 @@ file_name = "corrected_comments.xlsx"
384
  def load_corrected_comments():
385
  try:
386
  print("πŸ”„ Mencoba memuat data dari Hugging Face...")
387
- corrected_comments_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
388
  return pd.read_excel(corrected_comments_path, sheet_name='Corrected Comments')
389
  except Exception as e:
390
  print(f"⚠ Gagal memuat dari Hugging Face: {e}")
@@ -408,7 +408,7 @@ def save_corrected_comments(data):
408
  api.upload_file(
409
  path_or_fileobj=file_name,
410
  path_in_repo=file_name,
411
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
412
  repo_type="dataset"
413
  )
414
 
@@ -445,7 +445,7 @@ def run_clustering_for_ba_lainnya():
445
 
446
  st.write(f"**'BA Lainnya' Data:** {len(ba_lainnya_data)} rows")
447
  # Load keywords.json dari Hugging Face
448
- keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
449
 
450
  # Buka file yang telah diunduh
451
  with open(keywords_path, 'r') as f:
@@ -460,8 +460,8 @@ def run_clustering_for_ba_lainnya():
460
 
461
  sarcasm_detector = pipeline('sentiment-analysis', model='unitary/toxic-bert')
462
 
463
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
464
- corrected_comments_file = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"corrected_comments_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
465
 
466
  try:
467
  train_data = pd.read_excel(dataset_path)
@@ -728,7 +728,7 @@ def run_clustering_for_ba_lainnya():
728
  api.upload_file(
729
  path_or_fileobj=clustered_data_filename,
730
  path_in_repo=clustered_data_filename,
731
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
732
  repo_type="dataset"
733
  )
734
  st.success(f"βœ… Clustered Data berhasil diunggah ke Hugging Face: {clustered_data_filename}")
@@ -753,7 +753,7 @@ def run_clustering_for_ba_lainnya():
753
  api.upload_file(
754
  path_or_fileobj=corrected_comments_file,
755
  path_in_repo=corrected_comments_file,
756
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
757
  repo_type="dataset"
758
  )
759
  st.success(f"βœ… Corrected Comments berhasil disimpan & diunggah ke Hugging Face: {corrected_comments_file}")
@@ -777,8 +777,8 @@ menu = st.sidebar.radio("Select a Feature", ["Model-Based Classification","Clust
777
  if menu == "Model-Based Classification":
778
  st.title("Model-Based Classification")
779
  candidate = st.selectbox("Choose a candidate:", candidate_list)
780
- model_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
781
- vectorizer_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
782
 
783
  # Save the selected candidate to session state
784
  st.session_state['candidate'] = candidate
@@ -927,7 +927,7 @@ if menu == "Model-Based Classification":
927
  api.upload_file(
928
  path_or_fileobj=classification_results_file,
929
  path_in_repo=classification_results_file,
930
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
931
  repo_type="dataset"
932
  )
933
 
@@ -953,7 +953,7 @@ if menu == "Model-Based Classification":
953
  api.upload_file(
954
  path_or_fileobj=training_dataset_file,
955
  path_in_repo=training_dataset_file,
956
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
957
  repo_type="dataset"
958
  )
959
 
@@ -980,12 +980,12 @@ if menu == "View Training Dataset":
980
  selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_view')
981
 
982
  # Path dataset
983
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
984
- history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
985
 
986
  # Memuat dataset
987
  try:
988
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
989
  df = pd.read_excel(dataset_path)
990
 
991
  # Memastikan kolom yang diperlukan ada
@@ -1097,7 +1097,7 @@ if menu == "View Training Dataset":
1097
  st.subheader("History of Data Additions")
1098
 
1099
  try:
1100
- history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
1101
 
1102
  with open(history_path, "r") as f:
1103
  history = json.load(f)
@@ -1138,7 +1138,7 @@ if menu == "View Training Dataset":
1138
  api.upload_file(
1139
  path_or_fileobj=filtered_dataset_file,
1140
  path_in_repo=filtered_dataset_file,
1141
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
1142
  repo_type="dataset"
1143
  )
1144
 
@@ -1161,7 +1161,7 @@ if menu == "View Training Dataset":
1161
  api.upload_file(
1162
  path_or_fileobj=full_dataset_file,
1163
  path_in_repo=full_dataset_file,
1164
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
1165
  repo_type="dataset"
1166
  )
1167
 
@@ -1212,7 +1212,7 @@ if menu == "View Training Dataset":
1212
  api.upload_file(
1213
  path_or_fileobj=aggregated_dataset_file,
1214
  path_in_repo=aggregated_dataset_file,
1215
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
1216
  repo_type="dataset"
1217
  )
1218
 
@@ -1313,12 +1313,12 @@ if menu == "Evaluate Data Train":
1313
 
1314
  try:
1315
  # **πŸ“₯ Load Dataset dari Hugging Face**
1316
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1317
  df = pd.read_excel(dataset_path)
1318
 
1319
  # **βœ… Load existing keyword dictionary dari Hugging Face**
1320
  try:
1321
- keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
1322
  with open(keywords_path, 'r', encoding="utf-8") as f:
1323
  keyword_dict = json.load(f)
1324
 
@@ -1461,7 +1461,7 @@ if menu == "Evaluate Data Train":
1461
  api.upload_file(
1462
  path_or_fileobj=file_name,
1463
  path_in_repo=file_name,
1464
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
1465
  repo_type="dataset"
1466
  )
1467
 
@@ -1473,7 +1473,7 @@ if menu == "Evaluate Data Train":
1473
  # **4️⃣ Menangani FileNotFoundError & Error Lainnya**
1474
  try:
1475
  # Load dataset
1476
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
1477
  df = pd.read_json(dataset_path, lines=True)
1478
  st.dataframe(df)
1479
  except FileNotFoundError:
@@ -1487,12 +1487,12 @@ if menu == "Retraining Model":
1487
  selected_candidate = st.selectbox("Select a candidate to retrain the model:", list(candidate_list))
1488
 
1489
  # Load model & vectorizer dari Hugging Face Hub
1490
- model_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
1491
- vectorizer_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
1492
 
1493
  # Load dataset dari Hugging Face
1494
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1495
- retrain_history_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"retrain_history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset") # Jika ingin menyimpan history retrain secara lokal
1496
 
1497
  try:
1498
  # Load dataset
@@ -1627,7 +1627,7 @@ if menu == "Maximize Preprocessing":
1627
 
1628
  # Load Dataset Train
1629
  candidate = st.selectbox("Choose a candidate:", list(candidate_list))
1630
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1631
  try:
1632
  # Load dataset
1633
  data = pd.read_excel(dataset_path)
@@ -1716,7 +1716,7 @@ if menu == "Maximize Preprocessing":
1716
  api.upload_file(
1717
  path_or_fileobj=kamus_alay_filename,
1718
  path_in_repo=kamus_alay_filename,
1719
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
1720
  repo_type="dataset"
1721
  )
1722
  st.success(f"βœ… Kamus Alay successfully saved and uploaded to Hugging Face!")
@@ -1735,7 +1735,7 @@ if menu == "Maximize Preprocessing":
1735
  api.upload_file(
1736
  path_or_fileobj=kamus_ntb_filename,
1737
  path_in_repo=kamus_ntb_filename,
1738
- repo_id = "zakyirhab0/Klasifikasi_Komentar",
1739
  repo_type="dataset"
1740
  )
1741
  st.success(f"βœ… Kamus NTB successfully saved and uploaded to Hugging Face!")
@@ -1753,7 +1753,7 @@ if menu == "Update Keywords":
1753
 
1754
  # Load existing keyword dictionary from Hugging Face
1755
  try:
1756
- keywords_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
1757
 
1758
  with open(keywords_path, 'r', encoding='utf-8') as f:
1759
  keyword_dict = json.load(f)
@@ -1825,7 +1825,7 @@ if menu == "Update Keywords":
1825
 
1826
  # Analyze Training Data
1827
  st.subheader("Analyze Training Data")
1828
- dataset_path = hf_hub_download(repo_id = "zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1829
  try:
1830
  train_data = pd.read_excel(dataset_path)
1831
  if train_data.empty:
 
38
  # Inisialisasi API
39
  api = HfApi()
40
 
41
+ repo_id="zakyirhab0/Klasifikasi_Komentar"
42
 
43
  # === Preprocessing Functions === #
44
  candidate_list = ["Lalu Muhamad Iqbal", "Indah Dhamayanti Putri", "Zulkieflimansyah", "M Suhaili", "Sitti Rohmi Djalilah", "Musyafirin"]
 
110
  df = pd.read_csv(file_path, names=['slang', 'formal'])
111
  return pd.Series(df['formal'].values, index=df['slang']).to_dict()
112
 
113
+ ntb_dict_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="ntb_dict.json", repo_type="dataset")
114
+ slang_dict_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="kamusalay.csv", repo_type="dataset")
115
 
116
  ntb_dict = load_dictionary(ntb_dict_path, 'json')
117
  slang_dict = load_dictionary(slang_dict_path, 'csv')
 
148
  api.upload_file(
149
  path_or_fileobj="history_temp.json",
150
  path_in_repo=f"history_{candidate.lower().replace(' ', '_')}.json",
151
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
152
  repo_type="dataset"
153
  )
154
 
155
  # Fungsi untuk memperbarui dataset pelatihan dengan data baru
156
  def update_training_dataset(output, candidate):
157
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
158
+ history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
159
 
160
  try:
161
  required_columns = ['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'sentimen', 'tanggal', 'tanggal_masuk']
 
236
  api.upload_file(
237
  path_or_fileobj="dataset_temp.xlsx",
238
  path_in_repo=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx",
239
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
240
  repo_type="dataset"
241
  )
242
  st.success(f"Data successfully updated in {candidate}'s training dataset.")
 
263
  api.upload_file(
264
  path_or_fileobj=file_name,
265
  path_in_repo=file_name,
266
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
267
  repo_type="dataset"
268
  )
269
 
 
272
  # Load keywords dari Hugging Face
273
  def load_keywords_from_huggingface():
274
  try:
275
+ keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
276
  with open(keywords_path, 'r', encoding='utf-8') as f:
277
  return json.load(f)
278
  except Exception as e:
 
307
  return df
308
 
309
  def load_and_process_data(dataset_path, history_path):
310
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
311
  df = pd.read_excel(dataset_path)
312
  df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
313
 
 
384
  def load_corrected_comments():
385
  try:
386
  print("πŸ”„ Mencoba memuat data dari Hugging Face...")
387
+ corrected_comments_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
388
  return pd.read_excel(corrected_comments_path, sheet_name='Corrected Comments')
389
  except Exception as e:
390
  print(f"⚠ Gagal memuat dari Hugging Face: {e}")
 
408
  api.upload_file(
409
  path_or_fileobj=file_name,
410
  path_in_repo=file_name,
411
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
412
  repo_type="dataset"
413
  )
414
 
 
445
 
446
  st.write(f"**'BA Lainnya' Data:** {len(ba_lainnya_data)} rows")
447
  # Load keywords.json dari Hugging Face
448
+ keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
449
 
450
  # Buka file yang telah diunduh
451
  with open(keywords_path, 'r') as f:
 
460
 
461
  sarcasm_detector = pipeline('sentiment-analysis', model='unitary/toxic-bert')
462
 
463
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
464
+ corrected_comments_file = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"corrected_comments_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
465
 
466
  try:
467
  train_data = pd.read_excel(dataset_path)
 
728
  api.upload_file(
729
  path_or_fileobj=clustered_data_filename,
730
  path_in_repo=clustered_data_filename,
731
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
732
  repo_type="dataset"
733
  )
734
  st.success(f"βœ… Clustered Data berhasil diunggah ke Hugging Face: {clustered_data_filename}")
 
753
  api.upload_file(
754
  path_or_fileobj=corrected_comments_file,
755
  path_in_repo=corrected_comments_file,
756
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
757
  repo_type="dataset"
758
  )
759
  st.success(f"βœ… Corrected Comments berhasil disimpan & diunggah ke Hugging Face: {corrected_comments_file}")
 
777
  if menu == "Model-Based Classification":
778
  st.title("Model-Based Classification")
779
  candidate = st.selectbox("Choose a candidate:", candidate_list)
780
+ model_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
781
+ vectorizer_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
782
 
783
  # Save the selected candidate to session state
784
  st.session_state['candidate'] = candidate
 
927
  api.upload_file(
928
  path_or_fileobj=classification_results_file,
929
  path_in_repo=classification_results_file,
930
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
931
  repo_type="dataset"
932
  )
933
 
 
953
  api.upload_file(
954
  path_or_fileobj=training_dataset_file,
955
  path_in_repo=training_dataset_file,
956
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
957
  repo_type="dataset"
958
  )
959
 
 
980
  selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_view')
981
 
982
  # Path dataset
983
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
984
+ history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
985
 
986
  # Memuat dataset
987
  try:
988
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
989
  df = pd.read_excel(dataset_path)
990
 
991
  # Memastikan kolom yang diperlukan ada
 
1097
  st.subheader("History of Data Additions")
1098
 
1099
  try:
1100
+ history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
1101
 
1102
  with open(history_path, "r") as f:
1103
  history = json.load(f)
 
1138
  api.upload_file(
1139
  path_or_fileobj=filtered_dataset_file,
1140
  path_in_repo=filtered_dataset_file,
1141
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
1142
  repo_type="dataset"
1143
  )
1144
 
 
1161
  api.upload_file(
1162
  path_or_fileobj=full_dataset_file,
1163
  path_in_repo=full_dataset_file,
1164
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
1165
  repo_type="dataset"
1166
  )
1167
 
 
1212
  api.upload_file(
1213
  path_or_fileobj=aggregated_dataset_file,
1214
  path_in_repo=aggregated_dataset_file,
1215
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
1216
  repo_type="dataset"
1217
  )
1218
 
 
1313
 
1314
  try:
1315
  # **πŸ“₯ Load Dataset dari Hugging Face**
1316
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1317
  df = pd.read_excel(dataset_path)
1318
 
1319
  # **βœ… Load existing keyword dictionary dari Hugging Face**
1320
  try:
1321
+ keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
1322
  with open(keywords_path, 'r', encoding="utf-8") as f:
1323
  keyword_dict = json.load(f)
1324
 
 
1461
  api.upload_file(
1462
  path_or_fileobj=file_name,
1463
  path_in_repo=file_name,
1464
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
1465
  repo_type="dataset"
1466
  )
1467
 
 
1473
  # **4️⃣ Menangani FileNotFoundError & Error Lainnya**
1474
  try:
1475
  # Load dataset
1476
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
1477
  df = pd.read_json(dataset_path, lines=True)
1478
  st.dataframe(df)
1479
  except FileNotFoundError:
 
1487
  selected_candidate = st.selectbox("Select a candidate to retrain the model:", list(candidate_list))
1488
 
1489
  # Load model & vectorizer dari Hugging Face Hub
1490
+ model_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
1491
+ vectorizer_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
1492
 
1493
  # Load dataset dari Hugging Face
1494
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1495
+ retrain_history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"retrain_history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset") # Jika ingin menyimpan history retrain secara lokal
1496
 
1497
  try:
1498
  # Load dataset
 
1627
 
1628
  # Load Dataset Train
1629
  candidate = st.selectbox("Choose a candidate:", list(candidate_list))
1630
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1631
  try:
1632
  # Load dataset
1633
  data = pd.read_excel(dataset_path)
 
1716
  api.upload_file(
1717
  path_or_fileobj=kamus_alay_filename,
1718
  path_in_repo=kamus_alay_filename,
1719
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
1720
  repo_type="dataset"
1721
  )
1722
  st.success(f"βœ… Kamus Alay successfully saved and uploaded to Hugging Face!")
 
1735
  api.upload_file(
1736
  path_or_fileobj=kamus_ntb_filename,
1737
  path_in_repo=kamus_ntb_filename,
1738
+ repo_id="zakyirhab0/Klasifikasi_Komentar",
1739
  repo_type="dataset"
1740
  )
1741
  st.success(f"βœ… Kamus NTB successfully saved and uploaded to Hugging Face!")
 
1753
 
1754
  # Load existing keyword dictionary from Hugging Face
1755
  try:
1756
+ keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
1757
 
1758
  with open(keywords_path, 'r', encoding='utf-8') as f:
1759
  keyword_dict = json.load(f)
 
1825
 
1826
  # Analyze Training Data
1827
  st.subheader("Analyze Training Data")
1828
+ dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
1829
  try:
1830
  train_data = pd.read_excel(dataset_path)
1831
  if train_data.empty: