Spaces:
Sleeping
Sleeping
Commit
·
a1c0d1f
1
Parent(s):
6548ed5
Add internal ollama parsing to citizenship ocr
Browse files- Dockerfile +1 -0
- data_models.py +17 -0
- main.py +3 -4
- requirements.txt +2 -2
- utils.py +47 -2
Dockerfile
CHANGED
@@ -57,6 +57,7 @@ ENV DOCTR_CACHE_DIR=/app/.cache/doctr
|
|
57 |
# Copy the application code
|
58 |
COPY main.py .
|
59 |
COPY utils.py ./
|
|
|
60 |
COPY models /app/models
|
61 |
|
62 |
# Expose the port FastAPI will run on
|
|
|
57 |
# Copy the application code
|
58 |
COPY main.py .
|
59 |
COPY utils.py ./
|
60 |
+
COPY data_models.py ./
|
61 |
COPY models /app/models
|
62 |
|
63 |
# Expose the port FastAPI will run on
|
data_models.py
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pydantic import BaseModel
|
2 |
+
|
3 |
+
class Citizenship(BaseModel):
|
4 |
+
citizenship_no: str
|
5 |
+
name: str
|
6 |
+
gender: str
|
7 |
+
permanent_district: str
|
8 |
+
permanent_nagarpalika: str
|
9 |
+
temporary_district: str
|
10 |
+
temporary_nagarpalika: str
|
11 |
+
date_of_birth: str
|
12 |
+
father_name: str
|
13 |
+
father_address: str
|
14 |
+
mother_name: str
|
15 |
+
mother_address: str
|
16 |
+
wife_name: str
|
17 |
+
wife_address: str
|
main.py
CHANGED
@@ -71,7 +71,7 @@ from pydantic import BaseModel
|
|
71 |
import shutil
|
72 |
|
73 |
# Import from optimized utils
|
74 |
-
from utils import dev_number, roman_number, dev_letter, roman_letter, predict_ne,
|
75 |
|
76 |
app = FastAPI(
|
77 |
title="OCR API",
|
@@ -198,10 +198,9 @@ async def classify_ne(image: UploadFile = File(...)):
|
|
198 |
async def ocr_citizenship(image: UploadFile = File(...)):
|
199 |
"""OCR the provided Nepali Citizenship card"""
|
200 |
image_path = await save_upload_file_tmp(image)
|
201 |
-
prediction =
|
202 |
image_path=image_path,
|
203 |
-
)
|
204 |
-
|
205 |
return JSONResponse(content=prediction)
|
206 |
# Health check endpoint
|
207 |
@app.get("/health")
|
|
|
71 |
import shutil
|
72 |
|
73 |
# Import from optimized utils
|
74 |
+
from utils import dev_number, roman_number, dev_letter, roman_letter, predict_ne, perform_citizenship_ocr
|
75 |
|
76 |
app = FastAPI(
|
77 |
title="OCR API",
|
|
|
198 |
async def ocr_citizenship(image: UploadFile = File(...)):
|
199 |
"""OCR the provided Nepali Citizenship card"""
|
200 |
image_path = await save_upload_file_tmp(image)
|
201 |
+
prediction = perform_citizenship_ocr(
|
202 |
image_path=image_path,
|
203 |
+
)
|
|
|
204 |
return JSONResponse(content=prediction)
|
205 |
# Health check endpoint
|
206 |
@app.get("/health")
|
requirements.txt
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
python-doctr[torch,viz]
|
2 |
torch
|
3 |
torchvision
|
4 |
numpy
|
@@ -9,5 +9,5 @@ uvicorn
|
|
9 |
pydantic
|
10 |
python-multipart
|
11 |
scikit-learn==1.6.1
|
12 |
-
opencv-python
|
13 |
surya-ocr==0.13.1
|
|
|
1 |
+
python-doctr[torch,viz]==0.11.0
|
2 |
torch
|
3 |
torchvision
|
4 |
numpy
|
|
|
9 |
pydantic
|
10 |
python-multipart
|
11 |
scikit-learn==1.6.1
|
12 |
+
opencv-python==4.11.0.86
|
13 |
surya-ocr==0.13.1
|
utils.py
CHANGED
@@ -6,12 +6,16 @@ from PIL import Image
|
|
6 |
# from functools import lru_cache
|
7 |
from torchvision import models
|
8 |
from typing import List
|
|
|
|
|
|
|
9 |
import torchvision.transforms as transforms
|
10 |
import torch
|
11 |
import torch.nn as nn
|
12 |
import numpy as np
|
13 |
import cv2
|
14 |
import regex as re
|
|
|
15 |
# import os
|
16 |
import pickle
|
17 |
|
@@ -323,7 +327,7 @@ def merge_boxes_same_line(boxes, y_thresh=5, x_thresh=60):
|
|
323 |
|
324 |
return np.array(merged)
|
325 |
|
326 |
-
def
|
327 |
doctr_detector, surya_recognition_predictor, surya_detection_predictor = initialize_detector()
|
328 |
page = cv2.imread(image_path)
|
329 |
page = cv2.convertScaleAbs(page, alpha=1.5, beta=0)
|
@@ -368,4 +372,45 @@ def ocr_citizenship_utils(image_path: str) -> List[List[str]]:
|
|
368 |
line_result.append(text_combo)
|
369 |
prev_y = boxes[1]
|
370 |
|
371 |
-
return full_result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
# from functools import lru_cache
|
7 |
from torchvision import models
|
8 |
from typing import List
|
9 |
+
from fastapi import HTTPException
|
10 |
+
from data_models import Citizenship
|
11 |
+
import json
|
12 |
import torchvision.transforms as transforms
|
13 |
import torch
|
14 |
import torch.nn as nn
|
15 |
import numpy as np
|
16 |
import cv2
|
17 |
import regex as re
|
18 |
+
import requests
|
19 |
# import os
|
20 |
import pickle
|
21 |
|
|
|
327 |
|
328 |
return np.array(merged)
|
329 |
|
330 |
+
def ocr_citizenship(image_path: str) -> List[List[str]]:
|
331 |
doctr_detector, surya_recognition_predictor, surya_detection_predictor = initialize_detector()
|
332 |
page = cv2.imread(image_path)
|
333 |
page = cv2.convertScaleAbs(page, alpha=1.5, beta=0)
|
|
|
372 |
line_result.append(text_combo)
|
373 |
prev_y = boxes[1]
|
374 |
|
375 |
+
return full_result
|
376 |
+
|
377 |
+
PARSE_PROMPT = "You are a parsing agent. Your task is to generate a json response from the given text corpus."
|
378 |
+
def create_local_model(message, base_model):
|
379 |
+
try:
|
380 |
+
ollama_endpoint = "api/chat"
|
381 |
+
url = f"https://aioverlords-amnil-internal-ollama.hf.space/proxy/{ollama_endpoint}"
|
382 |
+
|
383 |
+
# Data to send in the POST request
|
384 |
+
data = {
|
385 |
+
"data": {
|
386 |
+
"model": "aisingapore/Llama-SEA-LION-v3-8B-IT",
|
387 |
+
"messages": message,
|
388 |
+
"stream": False,
|
389 |
+
"format": base_model.model_json_schema()
|
390 |
+
}
|
391 |
+
}
|
392 |
+
|
393 |
+
response = requests.post(url, json=data)
|
394 |
+
# Check the response
|
395 |
+
if response.status_code == 200:
|
396 |
+
print(f"Request Success:", response.json())
|
397 |
+
return json.loads(response.json()["message"]["content"])
|
398 |
+
# return response.json()
|
399 |
+
else:
|
400 |
+
print(f"Request Error:", response.status_code, response.text)
|
401 |
+
raise HTTPException(status_code=response.status_code, detail=response.text)
|
402 |
+
except HTTPException as http_exec:
|
403 |
+
raise http_exec
|
404 |
+
except Exception as e:
|
405 |
+
raise HTTPException(status_code=500, detail=str(e))
|
406 |
+
|
407 |
+
def perform_citizenship_ocr(image_path):
|
408 |
+
try:
|
409 |
+
unparsed_result = ocr_citizenship(image_path)
|
410 |
+
message = [
|
411 |
+
{"role": "system", "content": PARSE_PROMPT},
|
412 |
+
{"role": "user", "content": f"Given Text: \n{unparsed_result}"},
|
413 |
+
]
|
414 |
+
return create_local_model(message, Citizenship)
|
415 |
+
except Exception as e:
|
416 |
+
raise HTTPException(status_code=500, detail=str(e))
|