DeryFerd commited on
Commit
da10bc3
·
verified ·
1 Parent(s): cbadf24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -101
app.py CHANGED
@@ -1,13 +1,9 @@
1
- # main.py (Versi Final Definitif - Dengan Parsing Cerdas & Prompt Tepat)
 
 
2
  import os
3
- import shutil
4
  import re
5
- import uvicorn
6
- from fastapi import FastAPI, UploadFile, File, HTTPException
7
- from fastapi.middleware.cors import CORSMiddleware
8
- from pydantic import BaseModel
9
- import torch
10
-
11
  from langchain_community.document_loaders import PyPDFLoader
12
  from langchain.text_splitter import RecursiveCharacterTextSplitter
13
  from langchain_community.embeddings import HuggingFaceEmbeddings
@@ -15,128 +11,141 @@ from langchain_community.vectorstores import FAISS
15
  from langchain_community.retrievers import BM25Retriever
16
  from langchain.retrievers import EnsembleRetriever
17
  from transformers import AutoTokenizer, AutoModelForCausalLM
18
- import torch._dynamo as dynamo
19
-
20
- dynamo.config.automatic_dynamic_shapes = False
21
- dynamo.config.assume_static_by_default = True
22
-
23
- UPLOAD_DIR = "temp_uploads"
24
- os.makedirs(UPLOAD_DIR, exist_ok=True)
25
-
26
- app = FastAPI(title="Financial RAG Chatbot API")
27
-
28
- origins = ["*"]
29
- app.add_middleware(CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"])
30
-
31
- rag_pipeline = {"retriever": None, "llm": None, "tokenizer": None, "embeddings": None, "all_chunks": None}
32
 
33
- def setup_rag_pipeline():
34
- print("Memulai setup RAG pipeline...")
 
 
35
  device = "cuda" if torch.cuda.is_available() else "cpu"
36
- print(f"Menggunakan device: {device}")
37
- print("Memuat Embedding Model...")
38
- rag_pipeline["embeddings"] = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={'device': device})
39
- print("Memuat LLM (Gemma 270M)...")
40
- rag_pipeline["tokenizer"] = AutoTokenizer.from_pretrained("google/gemma-3-270m-it")
41
- rag_pipeline["llm"] = AutoModelForCausalLM.from_pretrained("google/gemma-3-270m-it", device_map="auto", torch_dtype=torch.bfloat16)
42
- print("Setup RAG pipeline selesai.")
43
-
44
- @app.on_event("startup")
45
- async def startup_event():
46
- setup_rag_pipeline()
47
-
48
- @app.post("/upload")
49
- async def upload_document(file: UploadFile = File(...)):
50
- # Fungsi ini tetap sama
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  try:
52
- file_path = os.path.join(UPLOAD_DIR, file.filename)
53
- with open(file_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer)
 
54
  loader = PyPDFLoader(file_path)
55
  docs = loader.load()
56
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
57
  chunks = text_splitter.split_documents(docs)
 
58
  rag_pipeline["all_chunks"] = chunks
59
- faiss_db = FAISS.from_documents(chunks, rag_pipeline["embeddings"])
 
60
  faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 10})
61
  bm25_retriever = BM25Retriever.from_documents(chunks)
62
  bm25_retriever.k = 10
63
- rag_pipeline["retriever"] = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])
64
- return {"status": "sukses", "message": f"Dokumen '{file.filename}' berhasil diproses."}
65
- except Exception as e: raise HTTPException(status_code=500, detail=str(e))
66
- finally:
67
- if 'file_path' in locals() and os.path.exists(file_path): os.remove(file_path)
 
 
 
 
68
 
69
- class ChatRequest(BaseModel):
70
- query: str
71
 
72
- @app.post("/chat")
73
- async def chat_with_doc(request: ChatRequest):
74
- if not rag_pipeline.get("retriever"):
75
- raise HTTPException(status_code=400, detail="Dokumen belum di-upload.")
76
 
77
- query_original = request.query
78
  query_lower = query_original.lower()
79
- clean_context = ""
80
- found_source = ""
81
-
82
  priority_keywords = ["jumlah aset lancar"]
83
  use_smart_lane = any(keyword in query_lower for keyword in priority_keywords)
84
 
85
  if use_smart_lane:
86
- print("Jalur Cerdas Aktif! Mencari konteks & parsing tahun...")
87
  year_match = re.search(r'\b(202[3-4])\b', query_lower)
88
  target_year = year_match.group(1) if year_match else "2024"
89
-
90
- all_chunks = rag_pipeline.get("all_chunks", [])
91
- for chunk in all_chunks:
92
  lines = chunk.page_content.split('\n')
93
  for line in lines:
94
  if any(keyword in line.lower() for keyword in priority_keywords):
95
- # --- [FIX #1] PARSING CERDAS DENGAN REGEX ---
96
- # Mencari semua angka yang diformat dengan koma/titik
97
  numbers = re.findall(r'(\d{1,3}(?:[.,]\d{3})*)', line)
98
  if len(numbers) >= 2:
99
- try:
100
- # Asumsi angka pertama adalah 2024, kedua 2023
101
- value_2024 = numbers[0]
102
- value_2023 = numbers[1]
103
- value = value_2024 if target_year == "2024" else value_2023
104
- # Buat fakta yang bersih untuk LLM
105
- clean_context = f"Fakta: jumlah aset lancar untuk tahun {target_year} adalah {value}."
106
- found_source = f"Halaman {chunk.metadata.get('page', 'NA')}"
107
- break
108
- except (IndexError, ValueError):
109
- continue
110
- if clean_context:
111
- break
112
 
113
- if not clean_context:
114
- print("Menggunakan Jalur Normal (Hybrid Search)...")
115
  retrieved_docs = rag_pipeline["retriever"].invoke(query_original)
116
  clean_context = "\n\n".join([doc.page_content for doc in retrieved_docs[:3]])
117
  found_source = ", ".join(list(set([f"Halaman {doc.metadata.get('page', 'NA')}" for doc in retrieved_docs[:3]])))
118
-
119
- # --- Generation ---
120
- tokenizer = rag_pipeline["tokenizer"]
121
- model = rag_pipeline["llm"]
122
-
123
- # --- [FIX #2] PROMPT YANG SANGAT TO-THE-POINT ---
124
- chat_template = [
125
- {"role": "user", "content": f"Gunakan informasi ini: '{clean_context}'. Jawab pertanyaan ini: '{query_original}'"}
126
- ]
127
- final_prompt = tokenizer.apply_chat_template(chat_template, tokenize=False, add_generation_prompt=True)
128
- inputs = tokenizer(final_prompt, return_tensors="pt").to(model.device)
129
- outputs = model.generate(**inputs, max_new_tokens=150, do_sample=False, pad_token_id=tokenizer.eos_token_id)
 
 
 
 
 
130
 
131
- input_length = inputs.input_ids.shape[1]
132
- generated_tokens = outputs[0, input_length:]
133
- final_answer = tokenizer.decode(generated_tokens, skip_special_tokens=True)
134
-
135
- return {"answer": final_answer.strip(), "source": found_source}
136
-
137
- @app.get("/")
138
- def read_root():
139
- return {"message": "Selamat datang di Financial RAG Chatbot API!"}
140
-
 
 
 
 
 
 
141
  if __name__ == "__main__":
142
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
1
+ # app.py (Versi Final untuk Gradio di Hugging Face)
2
+
3
+ import gradio as gr
4
  import os
 
5
  import re
6
+ import shutil
 
 
 
 
 
7
  from langchain_community.document_loaders import PyPDFLoader
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
9
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
11
  from langchain_community.retrievers import BM25Retriever
12
  from langchain.retrievers import EnsembleRetriever
13
  from transformers import AutoTokenizer, AutoModelForCausalLM
14
+ import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ # --- 1. SETUP MODEL (dijalankan sekali saat aplikasi start) ---
17
+ @torch.no_grad()
18
+ def load_models():
19
+ print("Memuat model (hanya terjadi sekali)...")
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
+
22
+ cache_dir = "./model_cache"
23
+ os.makedirs(cache_dir, exist_ok=True)
24
+ os.environ['SENTENCE_TRANSFORMERS_HOME'] = cache_dir
25
+
26
+ embeddings = HuggingFaceEmbeddings(
27
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
28
+ cache_folder=cache_dir
29
+ )
30
+
31
+ # Gunakan token dari secrets jika ada
32
+ hf_token = os.getenv("HUGGING_FACE_HUB_TOKEN")
33
+
34
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-270m-it", cache_dir=cache_dir, token=hf_token)
35
+ llm = AutoModelForCausalLM.from_pretrained(
36
+ "google/gemma-3-270m-it",
37
+ cache_dir=cache_dir,
38
+ device_map="auto",
39
+ torch_dtype=torch.bfloat16,
40
+ token=hf_token
41
+ )
42
+ print("Model berhasil dimuat.")
43
+ return embeddings, tokenizer, llm
44
+
45
+ embeddings, tokenizer, llm = load_models()
46
+ # Inisialisasi state global untuk retriever dan chunks
47
+ rag_pipeline = {"retriever": None, "all_chunks": None}
48
+
49
+
50
+ # --- 2. FUNGSI INTI RAG (backend logic) ---
51
+ def process_document(uploaded_file):
52
+ if uploaded_file is None:
53
+ return "Mohon unggah file terlebih dahulu.", gr.update(interactive=False)
54
+
55
  try:
56
+ # Gradio menyimpan file di temporary path, kita bisa langsung pakai
57
+ file_path = uploaded_file.name
58
+
59
  loader = PyPDFLoader(file_path)
60
  docs = loader.load()
61
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
62
  chunks = text_splitter.split_documents(docs)
63
+
64
  rag_pipeline["all_chunks"] = chunks
65
+
66
+ faiss_db = FAISS.from_documents(chunks, embeddings)
67
  faiss_retriever = faiss_db.as_retriever(search_kwargs={"k": 10})
68
  bm25_retriever = BM25Retriever.from_documents(chunks)
69
  bm25_retriever.k = 10
70
+
71
+ rag_pipeline["retriever"] = EnsembleRetriever(
72
+ retrievers=[bm25_retriever, faiss_retriever],
73
+ weights=[0.5, 0.5]
74
+ )
75
+
76
+ return f"File '{os.path.basename(file_path)}' berhasil diproses! Silakan ajukan pertanyaan.", gr.update(interactive=True)
77
+ except Exception as e:
78
+ return f"Error saat memproses file: {str(e)}", gr.update(interactive=False)
79
 
 
 
80
 
81
+ def get_rag_response(query, chat_history):
82
+ if rag_pipeline["retriever"] is None:
83
+ return "Dokumen belum diproses. Mohon unggah file terlebih dahulu."
 
84
 
85
+ query_original = query
86
  query_lower = query_original.lower()
87
+ final_answer = ""
88
+ found_source = "Tidak ada sumber spesifik"
89
+
90
  priority_keywords = ["jumlah aset lancar"]
91
  use_smart_lane = any(keyword in query_lower for keyword in priority_keywords)
92
 
93
  if use_smart_lane:
94
+ # Jalur Cerdas
95
  year_match = re.search(r'\b(202[3-4])\b', query_lower)
96
  target_year = year_match.group(1) if year_match else "2024"
97
+ for chunk in rag_pipeline["all_chunks"]:
 
 
98
  lines = chunk.page_content.split('\n')
99
  for line in lines:
100
  if any(keyword in line.lower() for keyword in priority_keywords):
 
 
101
  numbers = re.findall(r'(\d{1,3}(?:[.,]\d{3})*)', line)
102
  if len(numbers) >= 2:
103
+ value_2024 = numbers[0]
104
+ value_2023 = numbers[1]
105
+ value = value_2024 if target_year == "2024" else value_2023
106
+ final_answer = f"Jumlah aset lancar untuk tahun {target_year} adalah **{value}**."
107
+ found_source = f"Sumber: Halaman {chunk.metadata.get('page', 'NA')}"
108
+ break
109
+ if final_answer: break
 
 
 
 
 
 
110
 
111
+ if not final_answer:
112
+ # Jalur Normal
113
  retrieved_docs = rag_pipeline["retriever"].invoke(query_original)
114
  clean_context = "\n\n".join([doc.page_content for doc in retrieved_docs[:3]])
115
  found_source = ", ".join(list(set([f"Halaman {doc.metadata.get('page', 'NA')}" for doc in retrieved_docs[:3]])))
116
+
117
+ chat_template = [{"role": "system", "content": "Anda adalah AI analis keuangan yang teliti. Jawab pertanyaan hanya berdasarkan teks yang diberikan."}, {"role": "user", "content": f"Dari TEKS di bawah, temukan jawaban untuk pertanyaan '{query_original}'.\n\nTEKS:\n{clean_context}\n\nJAWABAN:"}]
118
+ final_prompt = tokenizer.apply_chat_template(chat_template, tokenize=False, add_generation_prompt=True)
119
+ inputs = tokenizer(final_prompt, return_tensors="pt").to(llm.device)
120
+ outputs = llm.generate(**inputs, max_new_tokens=250, do_sample=False, pad_token_id=tokenizer.eos_token_id)
121
+ input_length = inputs.input_ids.shape[1]
122
+ generated_tokens = outputs[0, input_length:]
123
+ final_answer = tokenizer.decode(generated_tokens, skip_special_tokens=True)
124
+
125
+ full_response = f"{final_answer}\n\n*{found_source}*"
126
+ chat_history.append((query, full_response))
127
+ return "", chat_history
128
+
129
+
130
+ # --- 3. MEMBUAT UI DENGAN GRADIO ---
131
+ with gr.Blocks() as demo:
132
+ gr.Markdown("# 📊 Financial RAG Chatbot")
133
 
134
+ with gr.Row():
135
+ with gr.Column(scale=1):
136
+ file_output = gr.Textbox(label="Status Dokumen", interactive=False)
137
+ upload_button = gr.UploadButton("Klik untuk Upload PDF", file_types=[".pdf"])
138
+ ask_button = gr.Button("Tanya", interactive=False)
139
+
140
+ with gr.Column(scale=4):
141
+ chatbot = gr.Chatbot(label="Chat")
142
+ msg = gr.Textbox(label="Ketik Pertanyaan Anda di Sini...")
143
+
144
+ # Hubungkan Aksi dengan Fungsi
145
+ upload_button.upload(process_document, upload_button, [file_output, ask_button])
146
+ msg.submit(get_rag_response, [msg, chatbot], [msg, chatbot])
147
+ ask_button.click(get_rag_response, [msg, chatbot], [msg, chatbot])
148
+
149
+ # --- 4. JALANKAN APLIKASI ---
150
  if __name__ == "__main__":
151
+ demo.launch()