Spaces:

brestok
/

ocr-backend

Running

App Files Files Community

Istvan-Adem commited on Jan 22

Commit

22379c6

0 Parent(s):

init

Browse files

Files changed (16) hide show

.gitattributes +1 -0
.gitignore +11 -0
Dockerfile +13 -0
README.md +8 -0
main.py +3 -0
ocr/__init__.py +46 -0
ocr/api/__init__.py +0 -0
ocr/api/message/__init__.py +7 -0
ocr/api/message/dto.py +0 -0
ocr/api/message/openai_request.py +28 -0
ocr/api/message/prompts.py +28 -0
ocr/api/message/schemas.py +6 -0
ocr/api/message/utils.py +12 -0
ocr/api/message/views.py +16 -0
ocr/core/config.py +37 -0
ocr/core/wrappers.py +87 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.index filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,11 @@

+__pycache__/
+env/
+venv/
+.venv/
+.idea/
+*.log
+*.egg-info/
+pip-wheel-EntityData/
+.env
+.DS_Store
+static/

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+    FROM python:3.12.7
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+---
+title: Ocr Backend
+emoji: 🏆
+colorFrom: purple
+colorTo: yellow
+sdk: docker
+pinned: false
+---

main.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ from ocr import create_app
2	+
3	+ app = create_app()

ocr/__init__.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import os
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from starlette.exceptions import HTTPException as StarletteHTTPException
+from starlette.staticfiles import StaticFiles
+from ocr.core.config import settings
+from ocr.core.wrappers import OcrResponseWrapper, ErrorOcrResponse
+def create_app() -> FastAPI:
+    app = FastAPI()
+    from ocr.api.message import ocr_router
+    app.include_router(ocr_router, tags=['message'])
+    app.add_middleware(
+        CORSMiddleware,
+        allow_origins=["*"],
+        allow_methods=["*"],
+        allow_headers=["*"],
+    )
+    static_directory = os.path.join(settings.BASE_DIR, 'static')
+    if not os.path.exists(static_directory):
+        os.makedirs(static_directory)
+    app.mount(
+        '/static',
+        StaticFiles(directory='static'),
+    )
+    @app.exception_handler(StarletteHTTPException)
+    async def http_exception_handler(_, exc):
+        return OcrResponseWrapper(
+            data=None,
+            successful=False,
+            error=ErrorOcrResponse(message=str(exc.detail))
+        ).response(exc.status_code)
+    @app.get("/")
+    async def read_root():
+        return {"message": "Hello world!"}
+    return app

ocr/api/__init__.py ADDED Viewed

File without changes

ocr/api/message/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from fastapi.routing import APIRouter
+ocr_router = APIRouter(
+    prefix="/api/ocr", tags=["message"]
+)
+from . import views

ocr/api/message/dto.py ADDED Viewed

File without changes

ocr/api/message/openai_request.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import asyncio
+import io
+from starlette.datastructures import UploadFile
+from ocr.api.message.prompts import OCRPrompts
+from ocr.api.message.utils import clean_assistant_response
+from ocr.core.config import settings
+async def analyze_uploaded_document(file: UploadFile):
+    contents = await file.read()
+    openai_file = io.BytesIO(contents)
+    openai_file.name = file.filename
+    thread, openai_file = await asyncio.gather(
+        settings.OPENAI_CLIENT.beta.threads.create(),
+        settings.OPENAI_CLIENT.files.create(purpose='assistants', file=openai_file)
+    )
+    await settings.OPENAI_CLIENT.beta.threads.messages.create(
+        attachments=[{"file_id": openai_file.id, "tools": [{"type": "file_search"}]}],
+        thread_id=thread.id,
+        role="user",
+        content='Generate a report on the attached document'
+    )
+    run = await settings.OPENAI_CLIENT.beta.threads.runs.create_and_poll(
+        assistant_id=settings.ASSISTANT_ID, thread_id=thread.id, instructions=OCRPrompts.generate_general_answer
+    )
+    return await clean_assistant_response(thread.id, run.id)

ocr/api/message/prompts.py ADDED Viewed

	@@ -0,0 +1,28 @@

+class OCRPrompts:
+    generate_general_answer = """## Task
+You must analyze the attached medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
+## Report Structure
+The report should be structured as follows, with each section containing only relevant information from the document:
+1. **Diagnosis and Staging Details**
+   Include all diagnosis-related and staging information.
+2. **Tumor Markers and Pathology Findings**
+   Provide detailed tumor markers and any pathology results mentioned.
+3. **Imaging Results** (e.g., CT, MRI summaries)
+   Summarize all relevant imaging results provided in the document.
+4. **Prior Treatments and Outcomes**
+   Detail any prior treatments and their outcomes as found in the document.
+## Instructions
+- Your response must be in **Markdown2** format.
+- Do not use bullet points very often.
+- **Do not invent or infer any information.** Only use data provided in the document.
+- If any section listed in the report structure lacks corresponding information, **omit the section entirely**. Do not leave blank sections.
+- Ensure that the format is followed strictly, and the output is complete without any deviations."""

ocr/api/message/schemas.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from pydantic import BaseModel
+class OcrResponse(BaseModel):
+    text: str

ocr/api/message/utils.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from ocr.core.config import settings
+async def clean_assistant_response(thread_id: str, run_id: str):
+    result = ''
+    async for message in settings.OPENAI_CLIENT.beta.threads.messages.list(thread_id=thread_id, run_id=run_id):
+        message_content = message.content[0].text
+        annotations = message_content.annotations
+        for annotation in annotations:
+            message_content.value = message_content.value.replace(annotation.text, f"")
+        result = message_content.value
+    return result

ocr/api/message/views.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from fastapi import File, UploadFile
+from ocr.api.message import ocr_router
+from ocr.api.message.openai_request import analyze_uploaded_document
+from ocr.api.message.schemas import OcrResponse
+from ocr.core.wrappers import OcrResponseWrapper
+@ocr_router.post('/parse')
+async def get_all_chat_messages(
+        file: UploadFile = File(...)
+) -> OcrResponseWrapper[OcrResponse]:
+    response = await analyze_uploaded_document(file)
+    with open('README.md', 'w') as file:
+        file.write(response)
+    return OcrResponseWrapper(data=OcrResponse(text=response))

ocr/core/config.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import os
+import pathlib
+from functools import lru_cache
+from dotenv import load_dotenv
+from openai import AsyncClient
+load_dotenv()
+class BaseConfig:
+    BASE_DIR: pathlib.Path = pathlib.Path(__file__).parent.parent.parent
+    SECRET_KEY = os.getenv('SECRET')
+    OPENAI_CLIENT = AsyncClient(api_key=os.getenv('OPENAI_API_KEY'))
+    ASSISTANT_ID = os.getenv('ASSISTANT_ID')
+class DevelopmentConfig(BaseConfig):
+    Issuer = "http://localhost:8000"
+    Audience = "http://localhost:3000"
+class ProductionConfig(BaseConfig):
+    Issuer = ""
+    Audience = ""
+@lru_cache()
+def get_settings() -> DevelopmentConfig | ProductionConfig:
+    config_cls_dict = {
+        'development': DevelopmentConfig,
+        'production': ProductionConfig,
+    }
+    config_name = os.getenv('FASTAPI_CONFIG', default='development')
+    config_cls = config_cls_dict[config_name]
+    return config_cls()
+settings = get_settings()

ocr/core/wrappers.py ADDED Viewed

	@@ -0,0 +1,87 @@

+import json
+from functools import wraps
+from typing import Generic, Optional, TypeVar
+import pydash
+from fastapi import HTTPException
+from pydantic import BaseModel
+from starlette.responses import JSONResponse
+from ocr.core.config import settings
+T = TypeVar('T')
+class ErrorOcrResponse(BaseModel):
+    message: str
+class OcrResponseWrapper(BaseModel, Generic[T]):
+    data: Optional[T] = None
+    successful: bool = True
+    error: Optional[ErrorOcrResponse] = None
+    def response(self, status_code: int):
+        return JSONResponse(
+            status_code=status_code,
+            content={
+                "data": self.data,
+                "successful": self.successful,
+                "error": self.error.dict() if self.error else None
+            }
+        )
+def exception_wrapper(http_error: int, error_message: str):
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs):
+            try:
+                return await func(*args, **kwargs)
+            except Exception as e:
+                raise HTTPException(status_code=http_error, detail=error_message) from e
+        return wrapper
+    return decorator
+def openai_wrapper(
+        temperature: int | float = 0, model: str = "gpt-4o-mini", is_json: bool = False, return_: str = None
+):
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs) -> str:
+            messages = await func(*args, **kwargs)
+            completion = await settings.OPENAI_CLIENT.chat.completions.create(
+                messages=messages,
+                temperature=temperature,
+                n=1,
+                model=model,
+                response_format={"type": "json_object"} if is_json else {"type": "text"}
+            )
+            response = completion.choices[0].message.content
+            if is_json:
+                response = json.loads(response)
+                if return_:
+                    return pydash.get(response, return_)
+            return response
+        return wrapper
+    return decorator
+def background_task():
+    def decorator(func):
+        @wraps(func)
+        async def wrapper(*args, **kwargs) -> str:
+            try:
+                result = await func(*args, **kwargs)
+                return result
+            except Exception as e:
+                pass
+        return wrapper
+    return decorator