removed multiple ocr api calls
Browse files
main.py
CHANGED
@@ -44,7 +44,8 @@ async def ProcessDocument(file: UploadFile):
|
|
44 |
raise HTTPException(status_code=400, detail="Cannot apply OCR to the image")
|
45 |
try:
|
46 |
tokenClassificationOutput, img_size = LabelTokens(ocr_df, image)
|
47 |
-
except:
|
|
|
48 |
raise HTTPException(status_code=400, detail="Entity identification failed")
|
49 |
|
50 |
try:
|
@@ -98,23 +99,16 @@ def ApplyOCR(content):
|
|
98 |
# printed_ocr_df.to_csv('temp/complete_image_ocr.csv', index=False)
|
99 |
# return printed_ocr_df, image
|
100 |
except Exception as e:
|
|
|
101 |
raise HTTPException(status_code=400, detail="Printed OCR process failed")
|
102 |
|
103 |
try:
|
104 |
trocr_client = ocr.TrOCRClient(config['settings'].TROCR_API_URL)
|
105 |
handwritten_ocr_df = trocr_client.ocr(handwritten_imgs, image)
|
106 |
-
except:
|
|
|
107 |
raise HTTPException(status_code=400, detail="handwritten OCR process failed")
|
108 |
|
109 |
-
try:
|
110 |
-
jpeg_bytes = io.BytesIO()
|
111 |
-
printed_img.save(jpeg_bytes, format='JPEG')
|
112 |
-
jpeg_content = jpeg_bytes.getvalue()
|
113 |
-
vision_client = ocr.VisionClient(config['settings'].GCV_AUTH)
|
114 |
-
printed_ocr_df = vision_client.ocr(jpeg_content, printed_img)
|
115 |
-
except:
|
116 |
-
raise HTTPException(status_code=400, detail="Printed OCR process failed")
|
117 |
-
|
118 |
ocr_df = pd.concat([handwritten_ocr_df, printed_ocr_df])
|
119 |
return ocr_df, image
|
120 |
|
|
|
44 |
raise HTTPException(status_code=400, detail="Cannot apply OCR to the image")
|
45 |
try:
|
46 |
tokenClassificationOutput, img_size = LabelTokens(ocr_df, image)
|
47 |
+
except Exception as e:
|
48 |
+
print(e)
|
49 |
raise HTTPException(status_code=400, detail="Entity identification failed")
|
50 |
|
51 |
try:
|
|
|
99 |
# printed_ocr_df.to_csv('temp/complete_image_ocr.csv', index=False)
|
100 |
# return printed_ocr_df, image
|
101 |
except Exception as e:
|
102 |
+
print(e)
|
103 |
raise HTTPException(status_code=400, detail="Printed OCR process failed")
|
104 |
|
105 |
try:
|
106 |
trocr_client = ocr.TrOCRClient(config['settings'].TROCR_API_URL)
|
107 |
handwritten_ocr_df = trocr_client.ocr(handwritten_imgs, image)
|
108 |
+
except Exception as e:
|
109 |
+
print(e)
|
110 |
raise HTTPException(status_code=400, detail="handwritten OCR process failed")
|
111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
112 |
ocr_df = pd.concat([handwritten_ocr_df, printed_ocr_df])
|
113 |
return ocr_df, image
|
114 |
|
ocr.py
CHANGED
@@ -12,6 +12,8 @@ image_ext = ("*.jpg", "*.jpeg", "*.png")
|
|
12 |
|
13 |
class VisionClient:
|
14 |
def __init__(self, auth):
|
|
|
|
|
15 |
credentials = service_account.Credentials.from_service_account_info(
|
16 |
auth
|
17 |
)
|
|
|
12 |
|
13 |
class VisionClient:
|
14 |
def __init__(self, auth):
|
15 |
+
# with open('temp/client_secret.json') as f:
|
16 |
+
# auth = json.load(f)
|
17 |
credentials = service_account.Credentials.from_service_account_info(
|
18 |
auth
|
19 |
)
|