Anusha806 commited on
Commit
4cccee3
·
1 Parent(s): d922025

Added complete LLM Claims API project

Browse files
Dockerfile ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+
14
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ import os
3
+ import json
4
+ import uuid
5
+
6
+ from fastapi import FastAPI, UploadFile, File, Form
7
+ from fastapi.responses import JSONResponse
8
+ from dotenv import load_dotenv
9
+ from utils.loader import extract_text_from_pdf
10
+ from utils.evaluator import evaluate
11
+ from utils.parser import parse_query_with_gemini
12
+
13
+ import google.generativeai as genai
14
+
15
+ # Load environment variables
16
+ load_dotenv()
17
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
18
+ print("Loaded Gemini API Key:", os.getenv("GEMINI_API_KEY"))
19
+ app = FastAPI()
20
+
21
+ # Ensure data directory exists
22
+ os.makedirs("data/documents", exist_ok=True)
23
+
24
+ @app.get("/")
25
+ def root():
26
+ return {"message": "LLM Claims API is up and running!"}
27
+
28
+ @app.post("/evaluate")
29
+ async def evaluate_query(query: str = Form(...), file: UploadFile = File(...)):
30
+ # Save uploaded file
31
+ file_id = str(uuid.uuid4())
32
+ file_path = f"data/documents/{file_id}.pdf"
33
+ with open(file_path, "wb") as f:
34
+ f.write(await file.read())
35
+
36
+ try:
37
+ # Extract and parse
38
+ policy_text = extract_text_from_pdf(file_path)
39
+
40
+ parsed_query = await parse_query_with_gemini(query) \
41
+ if callable(getattr(parse_query_with_gemini, "__await__", None)) else parse_query_with_gemini(query)
42
+
43
+ gemini_response = await query_gemini(policy_text, query)
44
+
45
+ rule_decision = evaluate(parsed_query, gemini_response.get("matched_clause", ""))
46
+
47
+ final_result = {
48
+ **gemini_response,
49
+ "parsed_query": parsed_query,
50
+ "rule_based_decision": rule_decision,
51
+ }
52
+
53
+ except Exception as e:
54
+ final_result = {
55
+ "error": str(e)
56
+ }
57
+
58
+ finally:
59
+ if os.path.exists(file_path):
60
+ os.remove(file_path)
61
+
62
+ return JSONResponse(content=final_result)
63
+
64
+
65
+ async def query_gemini(policy_text: str, query_text: str):
66
+ model = genai.GenerativeModel("models/gemini-1.5-flash-latest")
67
+
68
+ prompt = f"""
69
+ You are an insurance claim evaluator. Based on the policy document and query, respond in JSON with:
70
+ 1. decision: 'approved' or 'rejected'
71
+ 2. justification: brief explanation
72
+ 3. amount: estimated payout
73
+ 4. matched_clause: snippet of the policy that supports the decision
74
+ 5. similarity_score: float between 0 and 1
75
+
76
+ Policy:
77
+ {policy_text}
78
+
79
+ Query:
80
+ {query_text}
81
+ """
82
+
83
+ try:
84
+ response = model.generate_content(prompt)
85
+ content = response.text.strip()
86
+
87
+ # Clean markdown-style code formatting
88
+ if content.startswith("```json") or content.startswith("```"):
89
+ content = content.replace("```json", "").replace("```", "").strip()
90
+
91
+ return json.loads(content)
92
+
93
+ except Exception as e:
94
+ return {
95
+ "decision": "rejected",
96
+ "justification": f"Gemini Error: {str(e)}",
97
+ "amount": "₹0",
98
+ "matched_clause": "",
99
+ "similarity_score": 0.0
100
+ }
models/embedder.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sentence_transformers import SentenceTransformer
2
+
3
+ model = SentenceTransformer('all-MiniLM-L6-v2', device='cpu')
4
+
5
+ def get_embedding(text: str):
6
+ try:
7
+ vec = model.encode(text)
8
+ vec = vec.flatten()
9
+ assert vec.shape[0] == 384, f"Expected embedding of size 384, got {vec.shape[0]}"
10
+ return vec
11
+ except Exception as e:
12
+ print(f"Embedding Error: {e}")
13
+ return None
models/vector_store.py ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ python-multipart
4
+ pypdf
5
+ gradio
6
+ sentence-transformers
7
+ pinecone-client
8
+ pinecone-text
9
+ transformers
10
+ datasets
11
+ torch
12
+ python-dotenv
13
+ pandas
14
+ scikit-learn
15
+ tqdm
16
+ Pillow
17
+ # Add this:
18
+ google-generativeai
19
+ # Optional: remove this if OpenAI is no longer needed
20
+ # openai
utils/evaluator.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def evaluate(parsed_query: dict, matched_clause: str) -> dict:
2
+ procedure = parsed_query.get("procedure", "")
3
+ duration = parsed_query.get("policy_duration", "")
4
+
5
+ if not matched_clause or not procedure:
6
+ return {
7
+ "decision": "rejected",
8
+ "justification": "Unable to match clause or detect procedure from query.",
9
+ "amount": "₹0"
10
+ }
11
+
12
+ if procedure.lower() in matched_clause.lower():
13
+ return {
14
+ "decision": "approved",
15
+ "justification": f"{procedure.capitalize()} is covered under the policy. Clause matched.",
16
+ "amount": "₹80,000"
17
+ }
18
+
19
+ return {
20
+ "decision": "rejected",
21
+ "justification": "Procedure not clearly mentioned in policy document.",
22
+ "amount": "₹0"
23
+ }
utils/loader.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fitz # PyMuPDF
2
+
3
+ def extract_text_from_pdf(pdf_path: str) -> str:
4
+ text = ""
5
+ try:
6
+ with fitz.open(pdf_path) as doc:
7
+ for page in doc:
8
+ text += page.get_text()
9
+ return text.replace("\n", " ").replace(" ", " ").strip()
10
+ except Exception as e:
11
+ print(f"PDF Extraction Error: {e}")
12
+ return ""
13
+
utils/parser.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # parser.py
2
+ import json
3
+ import google.generativeai as genai
4
+ import os
5
+ from dotenv import load_dotenv
6
+
7
+ load_dotenv()
8
+ genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
9
+
10
+ def parse_query_with_gemini(query: str):
11
+ model = genai.GenerativeModel("models/gemini-1.5-flash-latest")
12
+ prompt = f"""
13
+ You are an intelligent insurance assistant.
14
+ Given a natural language query, extract the following fields as JSON. Do not include any explanation or extra text — just valid JSON:
15
+
16
+ - age (integer)
17
+ - gender (male/female/unknown)
18
+ - procedure (string)
19
+ - location (string)
20
+ - policy_duration_months (integer)
21
+
22
+ Query:
23
+ "{query}"
24
+ """
25
+ try:
26
+ response = model.generate_content(prompt)
27
+ response_text = response.text.strip()
28
+ if response_text.startswith("```"):
29
+ response_text = response_text.strip("`").replace("json", "").strip()
30
+ return json.loads(response_text)
31
+ except Exception as e:
32
+ return {
33
+ "error": "Failed to parse Gemini response",
34
+ "raw_response": response.text if 'response' in locals() else str(e)
35
+ }
utils/retriever.py ADDED
File without changes