Spaces:

brestok
/

ocr-backend

Sleeping

Istvan-Adem commited on Feb 7

Commit

67deb87

1 Parent(s): 70de892

add pytesseract

Files changed (6) hide show

ocr/__init__.py CHANGED Viewed

@@ -1,9 +1,6 @@
-import os
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.exceptions import HTTPException as StarletteHTTPException
-from starlette.staticfiles import StaticFiles
 from ocr.core.config import settings
 from ocr.core.wrappers import OcrResponseWrapper, ErrorOcrResponse
@@ -22,15 +19,6 @@ def create_app() -> FastAPI:
         allow_headers=["*"],
     )
-    static_directory = os.path.join(settings.BASE_DIR, 'static')
-    if not os.path.exists(static_directory):
-        os.makedirs(static_directory)
-    app.mount(
-        '/static',
-        StaticFiles(directory='static'),
-    )
     @app.exception_handler(StarletteHTTPException)
     async def http_exception_handler(_, exc):
         return OcrResponseWrapper(

 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from starlette.exceptions import HTTPException as StarletteHTTPException
 from ocr.core.config import settings
 from ocr.core.wrappers import OcrResponseWrapper, ErrorOcrResponse
         allow_headers=["*"],
     )
     @app.exception_handler(StarletteHTTPException)
     async def http_exception_handler(_, exc):
         return OcrResponseWrapper(

ocr/api/message/openai_request.py CHANGED Viewed

@@ -3,7 +3,7 @@ from ocr.core.wrappers import openai_wrapper
 @openai_wrapper(model='gpt-4o-mini')
-async def generate_report(request_content: list[dict]):
     messages = [
         {
             "role": "system",
@@ -11,7 +11,7 @@ async def generate_report(request_content: list[dict]):
         },
         {
             "role": "user",
-            "content": request_content
         }
     ]
     return messages

 @openai_wrapper(model='gpt-4o-mini')
+async def generate_report(text: str):
     messages = [
         {
             "role": "system",
         },
         {
             "role": "user",
+            "content": f"Generate a report based on this data:\n\n```\n{text}\n```"
         }
     ]
     return messages

ocr/api/message/prompts.py CHANGED Viewed

@@ -1,7 +1,7 @@
 class OCRPrompts:
     generate_general_answer = """## Task
-You must analyze the attached medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
 ## Report Structure
@@ -38,7 +38,7 @@ The report should be structured as follows, with each section containing only re
 ## Instructions
-- **Do not invent or infer any information.** Only use data provided in the document.
 - Ensure that the format is followed strictly, and the output is complete without any deviations.
 [/INST]"""

 class OCRPrompts:
     generate_general_answer = """## Task
+You must analyze the text extracted from medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
 ## Report Structure
 ## Instructions
+- **Do not invent or infer any information.** Only use data provided in the user request.
 - Ensure that the format is followed strictly, and the output is complete without any deviations.
 [/INST]"""

ocr/api/message/utils.py CHANGED Viewed

@@ -1,7 +1,8 @@
-import base64
 import io
 import re
 from pdf2image import convert_from_bytes
@@ -16,21 +17,15 @@ def divide_images(contents: bytes) -> list[bytes]:
     return image_bytes_list
-def prepare_request_content(images: list[bytes]):
-    content = [
-        {"type": "text", "text": "Generate a report on the attached document"},
-        *[
-            {
-                "type": "image_url",
-                "image_url": {
-                    "url": f"data:image/jpeg;base64,{base64.b64encode(image).decode('utf-8')}",
-                },
-            }
-            for image in images
-        ]
-    ]
-    return content
 def clean_response(text: str) -> str:
     try:

 import io
 import re
+import pytesseract
+from PIL import Image
 from pdf2image import convert_from_bytes
     return image_bytes_list
+def extract_text_from_images(images: list[bytes]) -> str:
+    extracted_texts = []
+    for image_bytes in images:
+        image = Image.open(io.BytesIO(image_bytes))
+        text = pytesseract.image_to_string(image)
+        extracted_texts.append(text)
+    return '\n'.join(extracted_texts)
 def clean_response(text: str) -> str:
     try:

ocr/api/message/views.py CHANGED Viewed

@@ -3,7 +3,7 @@ from fastapi import File, UploadFile
 from ocr.api.message import ocr_router
 from ocr.api.message.openai_request import generate_report
 from ocr.api.message.schemas import OcrResponse
-from ocr.api.message.utils import divide_images, prepare_request_content, clean_response
 from ocr.core.wrappers import OcrResponseWrapper
@@ -14,8 +14,8 @@ async def get_all_chat_messages(
     try:
         contents = await file.read()
         images = divide_images(contents)
-        request_content = prepare_request_content(images)
-        response = await generate_report(request_content)
         return OcrResponseWrapper(data=OcrResponse(text=clean_response(response)))
     finally:
         await file.close()

 from ocr.api.message import ocr_router
 from ocr.api.message.openai_request import generate_report
 from ocr.api.message.schemas import OcrResponse
+from ocr.api.message.utils import divide_images, clean_response, extract_text_from_images
 from ocr.core.wrappers import OcrResponseWrapper
     try:
         contents = await file.read()
         images = divide_images(contents)
+        text_content = extract_text_from_images(images)
+        response = await generate_report(text_content)
         return OcrResponseWrapper(data=OcrResponse(text=clean_response(response)))
     finally:
         await file.close()

requirements.txt CHANGED Viewed

@@ -11,11 +11,13 @@ httpx==0.28.1
 idna==3.10
 jiter==0.8.2
 openai==1.59.9
 pdf2image==1.17.0
 pillow==11.1.0
 pydantic==2.10.5
 pydantic_core==2.27.2
 pydash==8.0.5
 python-dotenv==1.0.1
 python-multipart==0.0.20
 PyYAML==6.0.2

 idna==3.10
 jiter==0.8.2
 openai==1.59.9
+packaging==24.2
 pdf2image==1.17.0
 pillow==11.1.0
 pydantic==2.10.5
 pydantic_core==2.27.2
 pydash==8.0.5
+pytesseract==0.3.13
 python-dotenv==1.0.1
 python-multipart==0.0.20
 PyYAML==6.0.2