amit01Xindus commited on
Commit
c64a3e6
·
verified ·
1 Parent(s): 032cd3f

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +22 -0
  2. app.py +274 -0
  3. requirements.txt +13 -0
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ RUN apt-get update && apt-get install -y \
6
+ gcc \
7
+ g++ \
8
+ make \
9
+ curl \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ COPY . .
16
+
17
+ EXPOSE 7860
18
+
19
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
20
+ CMD curl -f http://localhost:7860/health || exit 1
21
+
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,274 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import json
4
+ from datetime import datetime
5
+ from fastapi import FastAPI, HTTPException
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from pydantic import BaseModel
8
+ import uvicorn
9
+ import requests
10
+
11
+ app = FastAPI(title="HTS to HSN Classifier", version="1.0.0")
12
+
13
+ app.add_middleware(
14
+ CORSMiddleware,
15
+ allow_origins=["*"],
16
+ allow_credentials=True,
17
+ allow_methods=["*"],
18
+ allow_headers=["*"],
19
+ )
20
+
21
+ class ClassificationRequest(BaseModel):
22
+ hts_code_or_desc: str
23
+
24
+ class ClassificationResponse(BaseModel):
25
+ HSN_Code: str | None
26
+ HSN_Description: str | None
27
+ Confidence: str
28
+ Reasoning: str
29
+
30
+ class HuggingFaceInferenceClient:
31
+ def __init__(self, model_name: str = "meta-llama/Meta-Llama-3-8B-Instruct", api_token: str = None):
32
+ self.model_name = model_name
33
+ self.api_token = api_token or ""
34
+ if not self.api_token:
35
+ raise ValueError("Hugging Face API token not provided")
36
+ self.headers = {
37
+ "Authorization": f"Bearer {self.api_token}",
38
+ "Content-Type": "application/json"
39
+ }
40
+ # Fixed API URL - use the correct inference endpoint
41
+ self.api_url = f"https://api-inference.huggingface.co/models/{self.model_name}"
42
+
43
+ def invoke(self, prompt: str) -> str:
44
+ # Fixed payload structure for Hugging Face Inference API
45
+ payload = {
46
+ "inputs": prompt,
47
+ "parameters": {
48
+ "max_new_tokens": 500,
49
+ "temperature": 0.6,
50
+ "top_p": 0.95,
51
+ "return_full_text": False
52
+ }
53
+ }
54
+
55
+ try:
56
+ response = requests.post(self.api_url, json=payload, headers=self.headers, timeout=60)
57
+ response.raise_for_status()
58
+ data = response.json()
59
+
60
+ # Handle different response formats
61
+ if isinstance(data, list) and len(data) > 0:
62
+ if "generated_text" in data[0]:
63
+ return data[0]["generated_text"]
64
+ elif "text" in data[0]:
65
+ return data[0]["text"]
66
+ elif isinstance(data, dict):
67
+ if "generated_text" in data:
68
+ return data["generated_text"]
69
+ elif "text" in data:
70
+ return data["text"]
71
+
72
+ return str(data)
73
+
74
+ except requests.exceptions.RequestException as e:
75
+ print(f"API request failed: {e}")
76
+ raise Exception(f"Hugging Face API error: {e}")
77
+ except json.JSONDecodeError as e:
78
+ print(f"JSON decode error: {e}")
79
+ raise Exception(f"Invalid JSON response from API: {e}")
80
+
81
+
82
+ @app.on_event("startup")
83
+ async def startup_event():
84
+ try:
85
+ global vs_hts, vs_hsn, df_hts, df_hsn, llm_client, hts_code_col, hts_desc_col, hsn_code_col, hsn_desc_col
86
+
87
+ hts_path = "data/Htsdata.xlsx"
88
+ hsn_path = "data/HSN_SAC.xlsx"
89
+ cached_hts_vector_path = "data/faiss_hts_store"
90
+ cached_hsn_vector_path = "data/faiss_hsn_store"
91
+
92
+ print("Loading HSN data from:", hsn_path)
93
+
94
+ hts_code_col = "HTS Number"
95
+ hts_desc_col = "Description"
96
+ hsn_code_col = "HSN_CD"
97
+ hsn_desc_col = "HSN_Description"
98
+
99
+ df_hts = pd.read_excel(hts_path)
100
+ df_hts.columns = df_hts.columns.str.strip()
101
+ df_hsn = pd.read_excel(hsn_path)
102
+ df_hsn.columns = df_hsn.columns.str.strip()
103
+ df_hsn[hsn_code_col] = df_hsn[hsn_code_col].astype(str)
104
+
105
+ # Initialize with correct model name
106
+ llm_client = HuggingFaceInferenceClient(model_name="meta-llama/Meta-Llama-3-8B-Instruct")
107
+
108
+ print("✅ Application started successfully!")
109
+
110
+ except Exception as e:
111
+ print(f"❌ Startup error: {e}")
112
+ raise e
113
+
114
+ @app.get("/health")
115
+ async def health_check():
116
+ return {"status": "healthy", "timestamp": datetime.now().isoformat()}
117
+
118
+ def extract_structure(code: str):
119
+ code = "".join(filter(str.isdigit, str(code)))
120
+ return {
121
+ "chapter": code[:4] if len(code) >= 4 else None,
122
+ "heading": code[:6] if len(code) >= 6 else None,
123
+ "hsn8": code[:8] if len(code) >= 8 else code,
124
+ "full": code
125
+ }
126
+
127
+ def extract_json_from_text(text: str) -> dict:
128
+ """Extract JSON from text response, handling various formats."""
129
+ text = text.strip()
130
+
131
+ # Find JSON content between braces
132
+ start_idx = text.find('{')
133
+ end_idx = text.rfind('}')
134
+
135
+ if start_idx != -1 and end_idx != -1 and start_idx < end_idx:
136
+ json_str = text[start_idx:end_idx + 1]
137
+ try:
138
+ return json.loads(json_str)
139
+ except json.JSONDecodeError:
140
+ pass
141
+
142
+ # If JSON extraction fails, try to parse key-value pairs
143
+ try:
144
+ lines = text.split('\n')
145
+ result = {}
146
+ for line in lines:
147
+ if ':' in line:
148
+ key, value = line.split(':', 1)
149
+ key = key.strip().strip('"\'')
150
+ value = value.strip().strip('",\'')
151
+ if key in ['HSN_Code', 'HSN_Description', 'Confidence', 'Reasoning']:
152
+ result[key] = value
153
+
154
+ if len(result) >= 2: # At least some keys found
155
+ return result
156
+ except:
157
+ pass
158
+
159
+ return None
160
+
161
+ def map_hts_to_hsn(hts_code_or_desc: str):
162
+ reasoning_parts = []
163
+
164
+ if hts_code_or_desc.isdigit():
165
+ struct = extract_structure(hts_code_or_desc)
166
+ reasoning_parts.append(f"Input HTS code: {struct['full']}")
167
+
168
+ hts_match = df_hts[df_hts[hts_code_col].astype(str).str.startswith(struct["chapter"])]
169
+ hts_desc_list = hts_match[hts_desc_col].head(3).tolist() if not hts_match.empty else []
170
+ hts_desc_text = "; ".join(hts_desc_list) if hts_desc_list else "No HTS description found."
171
+ reasoning_parts.append(f"HTS Chapter {struct['chapter']}: {hts_desc_text}")
172
+
173
+ hsn_match = df_hsn[df_hsn[hsn_code_col] == struct["hsn8"]]
174
+ if not hsn_match.empty:
175
+ best_match = hsn_match.iloc[0]
176
+ reasoning_parts.append(f"Exact 8-digit HSN {struct['hsn8']} found.")
177
+ return {
178
+ "HSN_Code": best_match[hsn_code_col],
179
+ "HSN_Description": best_match[hsn_desc_col],
180
+ "Confidence": "High",
181
+ "Reasoning": " ".join(reasoning_parts)
182
+ }
183
+
184
+ fallback_heading_match = df_hsn[df_hsn[hsn_code_col].str.startswith(struct["heading"])]
185
+ if not fallback_heading_match.empty:
186
+ fallback_heading = fallback_heading_match.iloc[0]
187
+ reasoning_parts.append(f"No exact 8-digit HSN. Fallback heading {struct['heading']} found.")
188
+
189
+ # Improved prompt with better formatting
190
+ system_prompt = "You are an expert in Indian HSN classification. Respond only with valid JSON containing the keys: HSN_Code, HSN_Description, Confidence, Reasoning."
191
+
192
+ user_prompt = f"""
193
+ Input HTS code: {struct['full']}
194
+ HTS Description: {hts_desc_text}
195
+ Fallback HSN heading: {fallback_heading[hsn_code_col]} - {fallback_heading[hsn_desc_col]}
196
+
197
+ Based on this information, provide the most appropriate 8-digit HSN code and description.
198
+
199
+ Required JSON format:
200
+ {{
201
+ "HSN_Code": "XXXXXXXX",
202
+ "HSN_Description": "description here",
203
+ "Confidence": "High/Medium/Low",
204
+ "Reasoning": "explanation here"
205
+ }}
206
+ """
207
+
208
+ full_prompt = f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n{system_prompt}<|eot_id|><|start_header_id|>user<|end_header_id|>\n{user_prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n"
209
+
210
+ try:
211
+ llm_response = llm_client.invoke(full_prompt).strip()
212
+ print(f"LLM Response: {llm_response}") # Debug logging
213
+
214
+ parsed_response = extract_json_from_text(llm_response)
215
+
216
+ if parsed_response and all(key in parsed_response for key in ["HSN_Code", "HSN_Description"]):
217
+ # Ensure all required keys are present with defaults
218
+ return {
219
+ "HSN_Code": parsed_response.get("HSN_Code"),
220
+ "HSN_Description": parsed_response.get("HSN_Description"),
221
+ "Confidence": parsed_response.get("Confidence", "Medium"),
222
+ "Reasoning": parsed_response.get("Reasoning", "LLM classification")
223
+ }
224
+ else:
225
+ print(f"Invalid LLM response format: {parsed_response}")
226
+
227
+ except Exception as e:
228
+ print(f"LLM failed: {e}")
229
+
230
+ # Fallback if LLM fails
231
+ return {
232
+ "HSN_Code": fallback_heading[hsn_code_col],
233
+ "HSN_Description": fallback_heading[hsn_desc_col],
234
+ "Confidence": "Medium",
235
+ "Reasoning": " ".join(reasoning_parts) + " LLM failed, using fallback 6-digit heading."
236
+ }
237
+
238
+ chapter_match = df_hsn[df_hsn[hsn_code_col].str.startswith(struct["chapter"][:4])]
239
+ if not chapter_match.empty:
240
+ best_match = chapter_match.iloc[0]
241
+ reasoning_parts.append(f"No heading match. Fallback to chapter {struct['chapter'][:4]}.")
242
+ return {
243
+ "HSN_Code": best_match[hsn_code_col],
244
+ "HSN_Description": best_match[hsn_desc_col],
245
+ "Confidence": "Low",
246
+ "Reasoning": " ".join(reasoning_parts)
247
+ }
248
+
249
+ return {
250
+ "HSN_Code": None,
251
+ "HSN_Description": None,
252
+ "Confidence": "Low",
253
+ "Reasoning": " ".join(reasoning_parts) + " No HSN match found."
254
+ }
255
+
256
+ else:
257
+ reasoning_parts.append("Input is description. Semantic search not implemented for Hugging Face deployment.")
258
+ return {"HSN_Code": None, "HSN_Description": None, "Confidence": "Low",
259
+ "Reasoning": "Description search not available in this deployment."}
260
+
261
+ @app.post("/classify", response_model=ClassificationResponse)
262
+ async def classify_hts(request: ClassificationRequest):
263
+ try:
264
+ result = map_hts_to_hsn(request.hts_code_or_desc)
265
+ return result
266
+ except Exception as e:
267
+ raise HTTPException(status_code=500, detail=f"Classification error: {str(e)}")
268
+
269
+ @app.get("/")
270
+ async def root():
271
+ return {"message": "HTS to HSN Classification API", "status": "running"}
272
+
273
+ if __name__ == "__main__":
274
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.0.3
2
+ openpyxl==3.1.2
3
+ faiss-cpu==1.7.4
4
+ langchain-community==0.0.34
5
+ python-dotenv==1.0.1
6
+ fastapi==0.104.1
7
+ uvicorn[standard]==0.24.0
8
+ pydantic==2.5.0
9
+ requests==2.31.0
10
+ numpy==1.24.3
11
+ httpx==0.27.0
12
+ transformers==4.40.0
13
+ torch==2.3.0