kavg commited on
Commit
80d398c
·
1 Parent(s): d0225fc

removed multiple ocr api calls

Browse files
Files changed (2) hide show
  1. main.py +5 -11
  2. ocr.py +2 -0
main.py CHANGED
@@ -44,7 +44,8 @@ async def ProcessDocument(file: UploadFile):
44
  raise HTTPException(status_code=400, detail="Cannot apply OCR to the image")
45
  try:
46
  tokenClassificationOutput, img_size = LabelTokens(ocr_df, image)
47
- except:
 
48
  raise HTTPException(status_code=400, detail="Entity identification failed")
49
 
50
  try:
@@ -98,23 +99,16 @@ def ApplyOCR(content):
98
  # printed_ocr_df.to_csv('temp/complete_image_ocr.csv', index=False)
99
  # return printed_ocr_df, image
100
  except Exception as e:
 
101
  raise HTTPException(status_code=400, detail="Printed OCR process failed")
102
 
103
  try:
104
  trocr_client = ocr.TrOCRClient(config['settings'].TROCR_API_URL)
105
  handwritten_ocr_df = trocr_client.ocr(handwritten_imgs, image)
106
- except:
 
107
  raise HTTPException(status_code=400, detail="handwritten OCR process failed")
108
 
109
- try:
110
- jpeg_bytes = io.BytesIO()
111
- printed_img.save(jpeg_bytes, format='JPEG')
112
- jpeg_content = jpeg_bytes.getvalue()
113
- vision_client = ocr.VisionClient(config['settings'].GCV_AUTH)
114
- printed_ocr_df = vision_client.ocr(jpeg_content, printed_img)
115
- except:
116
- raise HTTPException(status_code=400, detail="Printed OCR process failed")
117
-
118
  ocr_df = pd.concat([handwritten_ocr_df, printed_ocr_df])
119
  return ocr_df, image
120
 
 
44
  raise HTTPException(status_code=400, detail="Cannot apply OCR to the image")
45
  try:
46
  tokenClassificationOutput, img_size = LabelTokens(ocr_df, image)
47
+ except Exception as e:
48
+ print(e)
49
  raise HTTPException(status_code=400, detail="Entity identification failed")
50
 
51
  try:
 
99
  # printed_ocr_df.to_csv('temp/complete_image_ocr.csv', index=False)
100
  # return printed_ocr_df, image
101
  except Exception as e:
102
+ print(e)
103
  raise HTTPException(status_code=400, detail="Printed OCR process failed")
104
 
105
  try:
106
  trocr_client = ocr.TrOCRClient(config['settings'].TROCR_API_URL)
107
  handwritten_ocr_df = trocr_client.ocr(handwritten_imgs, image)
108
+ except Exception as e:
109
+ print(e)
110
  raise HTTPException(status_code=400, detail="handwritten OCR process failed")
111
 
 
 
 
 
 
 
 
 
 
112
  ocr_df = pd.concat([handwritten_ocr_df, printed_ocr_df])
113
  return ocr_df, image
114
 
ocr.py CHANGED
@@ -12,6 +12,8 @@ image_ext = ("*.jpg", "*.jpeg", "*.png")
12
 
13
  class VisionClient:
14
  def __init__(self, auth):
 
 
15
  credentials = service_account.Credentials.from_service_account_info(
16
  auth
17
  )
 
12
 
13
  class VisionClient:
14
  def __init__(self, auth):
15
+ # with open('temp/client_secret.json') as f:
16
+ # auth = json.load(f)
17
  credentials = service_account.Credentials.from_service_account_info(
18
  auth
19
  )