Spaces:
Sleeping
Sleeping
zakyirhab0
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -38,7 +38,7 @@ login(token=huggingface_token)
|
|
38 |
# Inisialisasi API
|
39 |
api = HfApi()
|
40 |
|
41 |
-
repo_id
|
42 |
|
43 |
# === Preprocessing Functions === #
|
44 |
candidate_list = ["Lalu Muhamad Iqbal", "Indah Dhamayanti Putri", "Zulkieflimansyah", "M Suhaili", "Sitti Rohmi Djalilah", "Musyafirin"]
|
@@ -110,8 +110,8 @@ def load_dictionary(file_path, file_type='json'):
|
|
110 |
df = pd.read_csv(file_path, names=['slang', 'formal'])
|
111 |
return pd.Series(df['formal'].values, index=df['slang']).to_dict()
|
112 |
|
113 |
-
ntb_dict_path = hf_hub_download(repo_id
|
114 |
-
slang_dict_path = hf_hub_download(repo_id
|
115 |
|
116 |
ntb_dict = load_dictionary(ntb_dict_path, 'json')
|
117 |
slang_dict = load_dictionary(slang_dict_path, 'csv')
|
@@ -148,14 +148,14 @@ def update_history_json(history_path, komentar, link, model_data, field, date):
|
|
148 |
api.upload_file(
|
149 |
path_or_fileobj="history_temp.json",
|
150 |
path_in_repo=f"history_{candidate.lower().replace(' ', '_')}.json",
|
151 |
-
repo_id
|
152 |
repo_type="dataset"
|
153 |
)
|
154 |
|
155 |
# Fungsi untuk memperbarui dataset pelatihan dengan data baru
|
156 |
def update_training_dataset(output, candidate):
|
157 |
-
dataset_path = hf_hub_download(repo_id
|
158 |
-
history_path = hf_hub_download(repo_id
|
159 |
|
160 |
try:
|
161 |
required_columns = ['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'sentimen', 'tanggal', 'tanggal_masuk']
|
@@ -236,7 +236,7 @@ def update_training_dataset(output, candidate):
|
|
236 |
api.upload_file(
|
237 |
path_or_fileobj="dataset_temp.xlsx",
|
238 |
path_in_repo=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx",
|
239 |
-
repo_id
|
240 |
repo_type="dataset"
|
241 |
)
|
242 |
st.success(f"Data successfully updated in {candidate}'s training dataset.")
|
@@ -263,7 +263,7 @@ def save_keywords_to_huggingface(keyword_dict):
|
|
263 |
api.upload_file(
|
264 |
path_or_fileobj=file_name,
|
265 |
path_in_repo=file_name,
|
266 |
-
repo_id
|
267 |
repo_type="dataset"
|
268 |
)
|
269 |
|
@@ -272,7 +272,7 @@ def save_keywords_to_huggingface(keyword_dict):
|
|
272 |
# Load keywords dari Hugging Face
|
273 |
def load_keywords_from_huggingface():
|
274 |
try:
|
275 |
-
keywords_path = hf_hub_download(repo_id
|
276 |
with open(keywords_path, 'r', encoding='utf-8') as f:
|
277 |
return json.load(f)
|
278 |
except Exception as e:
|
@@ -307,7 +307,7 @@ def clustering_based_evaluation(df, n_clusters=10):
|
|
307 |
return df
|
308 |
|
309 |
def load_and_process_data(dataset_path, history_path):
|
310 |
-
dataset_path = hf_hub_download(repo_id
|
311 |
df = pd.read_excel(dataset_path)
|
312 |
df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
|
313 |
|
@@ -384,7 +384,7 @@ file_name = "corrected_comments.xlsx"
|
|
384 |
def load_corrected_comments():
|
385 |
try:
|
386 |
print("π Mencoba memuat data dari Hugging Face...")
|
387 |
-
corrected_comments_path = hf_hub_download(repo_id
|
388 |
return pd.read_excel(corrected_comments_path, sheet_name='Corrected Comments')
|
389 |
except Exception as e:
|
390 |
print(f"β Gagal memuat dari Hugging Face: {e}")
|
@@ -408,7 +408,7 @@ def save_corrected_comments(data):
|
|
408 |
api.upload_file(
|
409 |
path_or_fileobj=file_name,
|
410 |
path_in_repo=file_name,
|
411 |
-
repo_id
|
412 |
repo_type="dataset"
|
413 |
)
|
414 |
|
@@ -445,7 +445,7 @@ def run_clustering_for_ba_lainnya():
|
|
445 |
|
446 |
st.write(f"**'BA Lainnya' Data:** {len(ba_lainnya_data)} rows")
|
447 |
# Load keywords.json dari Hugging Face
|
448 |
-
keywords_path = hf_hub_download(repo_id
|
449 |
|
450 |
# Buka file yang telah diunduh
|
451 |
with open(keywords_path, 'r') as f:
|
@@ -460,8 +460,8 @@ def run_clustering_for_ba_lainnya():
|
|
460 |
|
461 |
sarcasm_detector = pipeline('sentiment-analysis', model='unitary/toxic-bert')
|
462 |
|
463 |
-
dataset_path = hf_hub_download(repo_id
|
464 |
-
corrected_comments_file = hf_hub_download(repo_id
|
465 |
|
466 |
try:
|
467 |
train_data = pd.read_excel(dataset_path)
|
@@ -728,7 +728,7 @@ def run_clustering_for_ba_lainnya():
|
|
728 |
api.upload_file(
|
729 |
path_or_fileobj=clustered_data_filename,
|
730 |
path_in_repo=clustered_data_filename,
|
731 |
-
repo_id
|
732 |
repo_type="dataset"
|
733 |
)
|
734 |
st.success(f"β
Clustered Data berhasil diunggah ke Hugging Face: {clustered_data_filename}")
|
@@ -753,7 +753,7 @@ def run_clustering_for_ba_lainnya():
|
|
753 |
api.upload_file(
|
754 |
path_or_fileobj=corrected_comments_file,
|
755 |
path_in_repo=corrected_comments_file,
|
756 |
-
repo_id
|
757 |
repo_type="dataset"
|
758 |
)
|
759 |
st.success(f"β
Corrected Comments berhasil disimpan & diunggah ke Hugging Face: {corrected_comments_file}")
|
@@ -777,8 +777,8 @@ menu = st.sidebar.radio("Select a Feature", ["Model-Based Classification","Clust
|
|
777 |
if menu == "Model-Based Classification":
|
778 |
st.title("Model-Based Classification")
|
779 |
candidate = st.selectbox("Choose a candidate:", candidate_list)
|
780 |
-
model_path = hf_hub_download(repo_id
|
781 |
-
vectorizer_path = hf_hub_download(repo_id
|
782 |
|
783 |
# Save the selected candidate to session state
|
784 |
st.session_state['candidate'] = candidate
|
@@ -927,7 +927,7 @@ if menu == "Model-Based Classification":
|
|
927 |
api.upload_file(
|
928 |
path_or_fileobj=classification_results_file,
|
929 |
path_in_repo=classification_results_file,
|
930 |
-
repo_id
|
931 |
repo_type="dataset"
|
932 |
)
|
933 |
|
@@ -953,7 +953,7 @@ if menu == "Model-Based Classification":
|
|
953 |
api.upload_file(
|
954 |
path_or_fileobj=training_dataset_file,
|
955 |
path_in_repo=training_dataset_file,
|
956 |
-
repo_id
|
957 |
repo_type="dataset"
|
958 |
)
|
959 |
|
@@ -980,12 +980,12 @@ if menu == "View Training Dataset":
|
|
980 |
selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_view')
|
981 |
|
982 |
# Path dataset
|
983 |
-
dataset_path = hf_hub_download(repo_id
|
984 |
-
history_path = hf_hub_download(repo_id
|
985 |
|
986 |
# Memuat dataset
|
987 |
try:
|
988 |
-
dataset_path = hf_hub_download(repo_id
|
989 |
df = pd.read_excel(dataset_path)
|
990 |
|
991 |
# Memastikan kolom yang diperlukan ada
|
@@ -1097,7 +1097,7 @@ if menu == "View Training Dataset":
|
|
1097 |
st.subheader("History of Data Additions")
|
1098 |
|
1099 |
try:
|
1100 |
-
history_path = hf_hub_download(repo_id
|
1101 |
|
1102 |
with open(history_path, "r") as f:
|
1103 |
history = json.load(f)
|
@@ -1138,7 +1138,7 @@ if menu == "View Training Dataset":
|
|
1138 |
api.upload_file(
|
1139 |
path_or_fileobj=filtered_dataset_file,
|
1140 |
path_in_repo=filtered_dataset_file,
|
1141 |
-
repo_id
|
1142 |
repo_type="dataset"
|
1143 |
)
|
1144 |
|
@@ -1161,7 +1161,7 @@ if menu == "View Training Dataset":
|
|
1161 |
api.upload_file(
|
1162 |
path_or_fileobj=full_dataset_file,
|
1163 |
path_in_repo=full_dataset_file,
|
1164 |
-
repo_id
|
1165 |
repo_type="dataset"
|
1166 |
)
|
1167 |
|
@@ -1212,7 +1212,7 @@ if menu == "View Training Dataset":
|
|
1212 |
api.upload_file(
|
1213 |
path_or_fileobj=aggregated_dataset_file,
|
1214 |
path_in_repo=aggregated_dataset_file,
|
1215 |
-
repo_id
|
1216 |
repo_type="dataset"
|
1217 |
)
|
1218 |
|
@@ -1313,12 +1313,12 @@ if menu == "Evaluate Data Train":
|
|
1313 |
|
1314 |
try:
|
1315 |
# **π₯ Load Dataset dari Hugging Face**
|
1316 |
-
dataset_path = hf_hub_download(repo_id
|
1317 |
df = pd.read_excel(dataset_path)
|
1318 |
|
1319 |
# **β
Load existing keyword dictionary dari Hugging Face**
|
1320 |
try:
|
1321 |
-
keywords_path = hf_hub_download(repo_id
|
1322 |
with open(keywords_path, 'r', encoding="utf-8") as f:
|
1323 |
keyword_dict = json.load(f)
|
1324 |
|
@@ -1461,7 +1461,7 @@ if menu == "Evaluate Data Train":
|
|
1461 |
api.upload_file(
|
1462 |
path_or_fileobj=file_name,
|
1463 |
path_in_repo=file_name,
|
1464 |
-
repo_id
|
1465 |
repo_type="dataset"
|
1466 |
)
|
1467 |
|
@@ -1473,7 +1473,7 @@ if menu == "Evaluate Data Train":
|
|
1473 |
# **4οΈβ£ Menangani FileNotFoundError & Error Lainnya**
|
1474 |
try:
|
1475 |
# Load dataset
|
1476 |
-
dataset_path = hf_hub_download(repo_id
|
1477 |
df = pd.read_json(dataset_path, lines=True)
|
1478 |
st.dataframe(df)
|
1479 |
except FileNotFoundError:
|
@@ -1487,12 +1487,12 @@ if menu == "Retraining Model":
|
|
1487 |
selected_candidate = st.selectbox("Select a candidate to retrain the model:", list(candidate_list))
|
1488 |
|
1489 |
# Load model & vectorizer dari Hugging Face Hub
|
1490 |
-
model_path = hf_hub_download(repo_id
|
1491 |
-
vectorizer_path = hf_hub_download(repo_id
|
1492 |
|
1493 |
# Load dataset dari Hugging Face
|
1494 |
-
dataset_path = hf_hub_download(repo_id
|
1495 |
-
retrain_history_path = hf_hub_download(repo_id
|
1496 |
|
1497 |
try:
|
1498 |
# Load dataset
|
@@ -1627,7 +1627,7 @@ if menu == "Maximize Preprocessing":
|
|
1627 |
|
1628 |
# Load Dataset Train
|
1629 |
candidate = st.selectbox("Choose a candidate:", list(candidate_list))
|
1630 |
-
dataset_path = hf_hub_download(repo_id
|
1631 |
try:
|
1632 |
# Load dataset
|
1633 |
data = pd.read_excel(dataset_path)
|
@@ -1716,7 +1716,7 @@ if menu == "Maximize Preprocessing":
|
|
1716 |
api.upload_file(
|
1717 |
path_or_fileobj=kamus_alay_filename,
|
1718 |
path_in_repo=kamus_alay_filename,
|
1719 |
-
repo_id
|
1720 |
repo_type="dataset"
|
1721 |
)
|
1722 |
st.success(f"β
Kamus Alay successfully saved and uploaded to Hugging Face!")
|
@@ -1735,7 +1735,7 @@ if menu == "Maximize Preprocessing":
|
|
1735 |
api.upload_file(
|
1736 |
path_or_fileobj=kamus_ntb_filename,
|
1737 |
path_in_repo=kamus_ntb_filename,
|
1738 |
-
repo_id
|
1739 |
repo_type="dataset"
|
1740 |
)
|
1741 |
st.success(f"β
Kamus NTB successfully saved and uploaded to Hugging Face!")
|
@@ -1753,7 +1753,7 @@ if menu == "Update Keywords":
|
|
1753 |
|
1754 |
# Load existing keyword dictionary from Hugging Face
|
1755 |
try:
|
1756 |
-
keywords_path = hf_hub_download(repo_id
|
1757 |
|
1758 |
with open(keywords_path, 'r', encoding='utf-8') as f:
|
1759 |
keyword_dict = json.load(f)
|
@@ -1825,7 +1825,7 @@ if menu == "Update Keywords":
|
|
1825 |
|
1826 |
# Analyze Training Data
|
1827 |
st.subheader("Analyze Training Data")
|
1828 |
-
dataset_path = hf_hub_download(repo_id
|
1829 |
try:
|
1830 |
train_data = pd.read_excel(dataset_path)
|
1831 |
if train_data.empty:
|
|
|
38 |
# Inisialisasi API
|
39 |
api = HfApi()
|
40 |
|
41 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar"
|
42 |
|
43 |
# === Preprocessing Functions === #
|
44 |
candidate_list = ["Lalu Muhamad Iqbal", "Indah Dhamayanti Putri", "Zulkieflimansyah", "M Suhaili", "Sitti Rohmi Djalilah", "Musyafirin"]
|
|
|
110 |
df = pd.read_csv(file_path, names=['slang', 'formal'])
|
111 |
return pd.Series(df['formal'].values, index=df['slang']).to_dict()
|
112 |
|
113 |
+
ntb_dict_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="ntb_dict.json", repo_type="dataset")
|
114 |
+
slang_dict_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="kamusalay.csv", repo_type="dataset")
|
115 |
|
116 |
ntb_dict = load_dictionary(ntb_dict_path, 'json')
|
117 |
slang_dict = load_dictionary(slang_dict_path, 'csv')
|
|
|
148 |
api.upload_file(
|
149 |
path_or_fileobj="history_temp.json",
|
150 |
path_in_repo=f"history_{candidate.lower().replace(' ', '_')}.json",
|
151 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
152 |
repo_type="dataset"
|
153 |
)
|
154 |
|
155 |
# Fungsi untuk memperbarui dataset pelatihan dengan data baru
|
156 |
def update_training_dataset(output, candidate):
|
157 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
158 |
+
history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
|
159 |
|
160 |
try:
|
161 |
required_columns = ['model_data', 'Platform', 'komentar', 'link', 'kandidat', 'sentimen', 'tanggal', 'tanggal_masuk']
|
|
|
236 |
api.upload_file(
|
237 |
path_or_fileobj="dataset_temp.xlsx",
|
238 |
path_in_repo=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx",
|
239 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
240 |
repo_type="dataset"
|
241 |
)
|
242 |
st.success(f"Data successfully updated in {candidate}'s training dataset.")
|
|
|
263 |
api.upload_file(
|
264 |
path_or_fileobj=file_name,
|
265 |
path_in_repo=file_name,
|
266 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
267 |
repo_type="dataset"
|
268 |
)
|
269 |
|
|
|
272 |
# Load keywords dari Hugging Face
|
273 |
def load_keywords_from_huggingface():
|
274 |
try:
|
275 |
+
keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
|
276 |
with open(keywords_path, 'r', encoding='utf-8') as f:
|
277 |
return json.load(f)
|
278 |
except Exception as e:
|
|
|
307 |
return df
|
308 |
|
309 |
def load_and_process_data(dataset_path, history_path):
|
310 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
311 |
df = pd.read_excel(dataset_path)
|
312 |
df['tanggal_masuk'] = pd.to_datetime(df['tanggal_masuk'], errors='coerce')
|
313 |
|
|
|
384 |
def load_corrected_comments():
|
385 |
try:
|
386 |
print("π Mencoba memuat data dari Hugging Face...")
|
387 |
+
corrected_comments_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
|
388 |
return pd.read_excel(corrected_comments_path, sheet_name='Corrected Comments')
|
389 |
except Exception as e:
|
390 |
print(f"β Gagal memuat dari Hugging Face: {e}")
|
|
|
408 |
api.upload_file(
|
409 |
path_or_fileobj=file_name,
|
410 |
path_in_repo=file_name,
|
411 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
412 |
repo_type="dataset"
|
413 |
)
|
414 |
|
|
|
445 |
|
446 |
st.write(f"**'BA Lainnya' Data:** {len(ba_lainnya_data)} rows")
|
447 |
# Load keywords.json dari Hugging Face
|
448 |
+
keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
|
449 |
|
450 |
# Buka file yang telah diunduh
|
451 |
with open(keywords_path, 'r') as f:
|
|
|
460 |
|
461 |
sarcasm_detector = pipeline('sentiment-analysis', model='unitary/toxic-bert')
|
462 |
|
463 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
464 |
+
corrected_comments_file = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"corrected_comments_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
465 |
|
466 |
try:
|
467 |
train_data = pd.read_excel(dataset_path)
|
|
|
728 |
api.upload_file(
|
729 |
path_or_fileobj=clustered_data_filename,
|
730 |
path_in_repo=clustered_data_filename,
|
731 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
732 |
repo_type="dataset"
|
733 |
)
|
734 |
st.success(f"β
Clustered Data berhasil diunggah ke Hugging Face: {clustered_data_filename}")
|
|
|
753 |
api.upload_file(
|
754 |
path_or_fileobj=corrected_comments_file,
|
755 |
path_in_repo=corrected_comments_file,
|
756 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
757 |
repo_type="dataset"
|
758 |
)
|
759 |
st.success(f"β
Corrected Comments berhasil disimpan & diunggah ke Hugging Face: {corrected_comments_file}")
|
|
|
777 |
if menu == "Model-Based Classification":
|
778 |
st.title("Model-Based Classification")
|
779 |
candidate = st.selectbox("Choose a candidate:", candidate_list)
|
780 |
+
model_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
|
781 |
+
vectorizer_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{candidate.replace(' ', '_').lower()}.joblib", repo_type="dataset")
|
782 |
|
783 |
# Save the selected candidate to session state
|
784 |
st.session_state['candidate'] = candidate
|
|
|
927 |
api.upload_file(
|
928 |
path_or_fileobj=classification_results_file,
|
929 |
path_in_repo=classification_results_file,
|
930 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
931 |
repo_type="dataset"
|
932 |
)
|
933 |
|
|
|
953 |
api.upload_file(
|
954 |
path_or_fileobj=training_dataset_file,
|
955 |
path_in_repo=training_dataset_file,
|
956 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
957 |
repo_type="dataset"
|
958 |
)
|
959 |
|
|
|
980 |
selected_candidate = st.selectbox("Choose a candidate:", list(candidate_list), key='candidate_select_view')
|
981 |
|
982 |
# Path dataset
|
983 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
984 |
+
history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
|
985 |
|
986 |
# Memuat dataset
|
987 |
try:
|
988 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
989 |
df = pd.read_excel(dataset_path)
|
990 |
|
991 |
# Memastikan kolom yang diperlukan ada
|
|
|
1097 |
st.subheader("History of Data Additions")
|
1098 |
|
1099 |
try:
|
1100 |
+
history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset")
|
1101 |
|
1102 |
with open(history_path, "r") as f:
|
1103 |
history = json.load(f)
|
|
|
1138 |
api.upload_file(
|
1139 |
path_or_fileobj=filtered_dataset_file,
|
1140 |
path_in_repo=filtered_dataset_file,
|
1141 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
1142 |
repo_type="dataset"
|
1143 |
)
|
1144 |
|
|
|
1161 |
api.upload_file(
|
1162 |
path_or_fileobj=full_dataset_file,
|
1163 |
path_in_repo=full_dataset_file,
|
1164 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
1165 |
repo_type="dataset"
|
1166 |
)
|
1167 |
|
|
|
1212 |
api.upload_file(
|
1213 |
path_or_fileobj=aggregated_dataset_file,
|
1214 |
path_in_repo=aggregated_dataset_file,
|
1215 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
1216 |
repo_type="dataset"
|
1217 |
)
|
1218 |
|
|
|
1313 |
|
1314 |
try:
|
1315 |
# **π₯ Load Dataset dari Hugging Face**
|
1316 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
1317 |
df = pd.read_excel(dataset_path)
|
1318 |
|
1319 |
# **β
Load existing keyword dictionary dari Hugging Face**
|
1320 |
try:
|
1321 |
+
keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
|
1322 |
with open(keywords_path, 'r', encoding="utf-8") as f:
|
1323 |
keyword_dict = json.load(f)
|
1324 |
|
|
|
1461 |
api.upload_file(
|
1462 |
path_or_fileobj=file_name,
|
1463 |
path_in_repo=file_name,
|
1464 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
1465 |
repo_type="dataset"
|
1466 |
)
|
1467 |
|
|
|
1473 |
# **4οΈβ£ Menangani FileNotFoundError & Error Lainnya**
|
1474 |
try:
|
1475 |
# Load dataset
|
1476 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=file_name, repo_type="dataset")
|
1477 |
df = pd.read_json(dataset_path, lines=True)
|
1478 |
st.dataframe(df)
|
1479 |
except FileNotFoundError:
|
|
|
1487 |
selected_candidate = st.selectbox("Select a candidate to retrain the model:", list(candidate_list))
|
1488 |
|
1489 |
# Load model & vectorizer dari Hugging Face Hub
|
1490 |
+
model_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"best_rf_model_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
|
1491 |
+
vectorizer_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"tfidf_vectorizer_{selected_candidate.lower().replace(' ', '_')}.joblib", repo_type="dataset")
|
1492 |
|
1493 |
# Load dataset dari Hugging Face
|
1494 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{selected_candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
1495 |
+
retrain_history_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"retrain_history_{selected_candidate.lower().replace(' ', '_')}.json", repo_type="dataset") # Jika ingin menyimpan history retrain secara lokal
|
1496 |
|
1497 |
try:
|
1498 |
# Load dataset
|
|
|
1627 |
|
1628 |
# Load Dataset Train
|
1629 |
candidate = st.selectbox("Choose a candidate:", list(candidate_list))
|
1630 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
1631 |
try:
|
1632 |
# Load dataset
|
1633 |
data = pd.read_excel(dataset_path)
|
|
|
1716 |
api.upload_file(
|
1717 |
path_or_fileobj=kamus_alay_filename,
|
1718 |
path_in_repo=kamus_alay_filename,
|
1719 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
1720 |
repo_type="dataset"
|
1721 |
)
|
1722 |
st.success(f"β
Kamus Alay successfully saved and uploaded to Hugging Face!")
|
|
|
1735 |
api.upload_file(
|
1736 |
path_or_fileobj=kamus_ntb_filename,
|
1737 |
path_in_repo=kamus_ntb_filename,
|
1738 |
+
repo_id="zakyirhab0/Klasifikasi_Komentar",
|
1739 |
repo_type="dataset"
|
1740 |
)
|
1741 |
st.success(f"β
Kamus NTB successfully saved and uploaded to Hugging Face!")
|
|
|
1753 |
|
1754 |
# Load existing keyword dictionary from Hugging Face
|
1755 |
try:
|
1756 |
+
keywords_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename="keywords.json", repo_type="dataset")
|
1757 |
|
1758 |
with open(keywords_path, 'r', encoding='utf-8') as f:
|
1759 |
keyword_dict = json.load(f)
|
|
|
1825 |
|
1826 |
# Analyze Training Data
|
1827 |
st.subheader("Analyze Training Data")
|
1828 |
+
dataset_path = hf_hub_download(repo_id="zakyirhab0/Klasifikasi_Komentar", filename=f"datasetntbnew_{candidate.lower().replace(' ', '_')}.xlsx", repo_type="dataset")
|
1829 |
try:
|
1830 |
train_data = pd.read_excel(dataset_path)
|
1831 |
if train_data.empty:
|