Spaces:
Paused
Paused
# Import Library yang Diperlukan | |
import gradio as gr | |
import gspread | |
from oauth2client.service_account import ServiceAccountCredentials | |
from llama_cpp import Llama | |
from llama_index.core import VectorStoreIndex, Settings | |
from llama_index.core.node_parser import SentenceSplitter | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from llama_index.llms.llama_cpp import LlamaCPP | |
from huggingface_hub import hf_hub_download | |
from llama_index.core.llms import ChatMessage | |
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine | |
from llama_index.core.schema import Document | |
# =================================== | |
# 1️⃣ Fungsi untuk Membaca Google Spreadsheet dari Beberapa Worksheet | |
# =================================== | |
def read_google_sheets(): | |
try: | |
# Tentukan scope akses ke Google Sheets & Drive | |
scope = ["https://www.googleapis.com/auth/spreadsheets", "https://www.googleapis.com/auth/drive"] | |
# Load kredensial dari file credentials.json | |
creds = ServiceAccountCredentials.from_json_keyfile_name("credentials.json", scope) | |
client = gspread.authorize(creds) | |
# ID Spreadsheet (tetap sama untuk semua sheet) | |
SPREADSHEET_ID = "1e_cNMhwF-QYpyYUpqQh-XCw-OdhWS6EuYsoBUsVtdNg" | |
# 📌 Daftar nama worksheet yang akan dibaca | |
sheet_names = ["datatarget", "datacuti", "dataabsen", "datalembur", "pkb"] | |
all_data = [] # 🔹 List untuk menyimpan semua data | |
# 📌 Loop untuk membaca setiap worksheet | |
spreadsheet = client.open_by_key(SPREADSHEET_ID) | |
for sheet_name in sheet_names: | |
try: | |
sheet = spreadsheet.worksheet(sheet_name) | |
data = sheet.get_all_values() | |
# Tambahkan header nama sheet sebelum data untuk membedakan | |
all_data.append(f"=== Data dari {sheet_name.upper()} ===") | |
all_data.extend([" | ".join(row) for row in data]) | |
all_data.append("\n") # Pisahkan tiap sheet dengan newline | |
except gspread.exceptions.WorksheetNotFound: | |
all_data.append(f"❌ ERROR: Worksheet {sheet_name} tidak ditemukan.") | |
# Gabungkan semua data menjadi satu string panjang | |
formatted_text = "\n".join(all_data) | |
return formatted_text | |
except gspread.exceptions.SpreadsheetNotFound: | |
return "❌ ERROR: Spreadsheet tidak ditemukan. Pastikan ID/nama benar!" | |
except Exception as e: | |
return f"❌ ERROR: {str(e)}" | |
# =================================== | |
# 2️⃣ Fungsi untuk Mengunduh Model Llama | |
# =================================== | |
def initialize_llama_model(): | |
model_path = hf_hub_download( | |
repo_id="TheBLoke/zephyr-7b-beta-GGUF", | |
filename="zephyr-7b-beta.Q4_K_M.gguf", | |
cache_dir="./models" | |
) | |
return model_path | |
# =================================== | |
# 3️⃣ Inisialisasi Model dan Pengaturan | |
# =================================== | |
def initialize_settings(model_path): | |
Settings.llm = LlamaCPP( | |
model_path=model_path, | |
temperature=0.7, | |
) | |
# =================================== | |
# 4️⃣ Inisialisasi Index dari Data Spreadsheet | |
# =================================== | |
def initialize_index(): | |
text_data = read_google_sheets() | |
document = Document(text=text_data) | |
documents = [document] | |
parser = SentenceSplitter(chunk_size=150, chunk_overlap=10) | |
nodes = parser.get_nodes_from_documents(documents) | |
embedding = HuggingFaceEmbedding("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2") | |
Settings.embed_model = embedding | |
index = VectorStoreIndex(nodes) | |
return index | |
# =================================== | |
# 5️⃣ Inisialisasi Mesin Chatbot | |
# =================================== | |
def initialize_chat_engine(index): | |
retriever = index.as_retriever(similarity_top_k=3) | |
chat_engine = CondensePlusContextChatEngine.from_defaults( | |
retriever=retriever, | |
verbose=True, | |
) | |
return chat_engine | |
# =================================== | |
# 6️⃣ Fungsi untuk Menghasilkan Respons Chatbot | |
# =================================== | |
def generate_response(message, history, chat_engine): | |
if history is None: | |
history = [] | |
text_data = read_google_sheets() | |
document = Document(text=text_data) | |
documents = [document] | |
parser = SentenceSplitter(chunk_size=150, chunk_overlap=10) | |
nodes = parser.get_nodes_from_documents(documents) | |
index = VectorStoreIndex(nodes) | |
retriever = index.as_retriever(similarity_top_k=3) | |
chat_engine = CondensePlusContextChatEngine.from_defaults( | |
retriever=retriever, | |
verbose=True, | |
) | |
chat_messages = [ | |
ChatMessage( | |
role="system", | |
content=( | |
"Anda adalah chatbot yang dirancang khusus untuk berbicara dalam Bahasa Indonesia. " | |
"Anda tidak diperbolehkan menjawab dalam bahasa lain, termasuk Inggris. " | |
"Gunakan gaya bahasa profesional tetapi tetap ramah. " | |
"Jika informasi tidak tersedia dalam dokumen, katakan dengan sopan bahwa Anda tidak tahu. " | |
"Pastikan setiap jawaban diberikan secara ringkas, jelas, dan sesuai konteks." | |
), | |
), | |
] | |
response = chat_engine.stream_chat(message) | |
text = "".join(response.response_gen) | |
history.append((message, text)) | |
return history | |
# =================================== | |
# 7️⃣ Fungsi Utama untuk Menjalankan Aplikasi | |
# =================================== | |
def main(): | |
model_path = initialize_llama_model() | |
initialize_settings(model_path) | |
index = initialize_index() | |
chat_engine = initialize_chat_engine(index) | |
def chatbot_response(message, history): | |
return generate_response(message, history, chat_engine) | |
gr.Interface( | |
fn=chatbot_response, | |
inputs=["text"], | |
outputs=["text"], | |
).launch() | |
if __name__ == "__main__": | |
main() |