Istvan-Adem commited on
Commit
22379c6
·
0 Parent(s):
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.index filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ env/
3
+ venv/
4
+ .venv/
5
+ .idea/
6
+ *.log
7
+ *.egg-info/
8
+ pip-wheel-EntityData/
9
+ .env
10
+ .DS_Store
11
+ static/
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.12.7
2
+
3
+ RUN useradd -m -u 1000 user
4
+ USER user
5
+ ENV PATH="/home/user/.local/bin:$PATH"
6
+
7
+ WORKDIR /app
8
+
9
+ COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
11
+
12
+ COPY --chown=user . /app
13
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Ocr Backend
3
+ emoji: 🏆
4
+ colorFrom: purple
5
+ colorTo: yellow
6
+ sdk: docker
7
+ pinned: false
8
+ ---
main.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ from ocr import create_app
2
+
3
+ app = create_app()
ocr/__init__.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from fastapi import FastAPI
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from starlette.exceptions import HTTPException as StarletteHTTPException
6
+ from starlette.staticfiles import StaticFiles
7
+
8
+ from ocr.core.config import settings
9
+ from ocr.core.wrappers import OcrResponseWrapper, ErrorOcrResponse
10
+
11
+
12
+ def create_app() -> FastAPI:
13
+ app = FastAPI()
14
+
15
+ from ocr.api.message import ocr_router
16
+ app.include_router(ocr_router, tags=['message'])
17
+
18
+ app.add_middleware(
19
+ CORSMiddleware,
20
+ allow_origins=["*"],
21
+ allow_methods=["*"],
22
+ allow_headers=["*"],
23
+ )
24
+
25
+ static_directory = os.path.join(settings.BASE_DIR, 'static')
26
+ if not os.path.exists(static_directory):
27
+ os.makedirs(static_directory)
28
+
29
+ app.mount(
30
+ '/static',
31
+ StaticFiles(directory='static'),
32
+ )
33
+
34
+ @app.exception_handler(StarletteHTTPException)
35
+ async def http_exception_handler(_, exc):
36
+ return OcrResponseWrapper(
37
+ data=None,
38
+ successful=False,
39
+ error=ErrorOcrResponse(message=str(exc.detail))
40
+ ).response(exc.status_code)
41
+
42
+ @app.get("/")
43
+ async def read_root():
44
+ return {"message": "Hello world!"}
45
+
46
+ return app
ocr/api/__init__.py ADDED
File without changes
ocr/api/message/__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi.routing import APIRouter
2
+
3
+ ocr_router = APIRouter(
4
+ prefix="/api/ocr", tags=["message"]
5
+ )
6
+
7
+ from . import views
ocr/api/message/dto.py ADDED
File without changes
ocr/api/message/openai_request.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import io
3
+
4
+ from starlette.datastructures import UploadFile
5
+
6
+ from ocr.api.message.prompts import OCRPrompts
7
+ from ocr.api.message.utils import clean_assistant_response
8
+ from ocr.core.config import settings
9
+
10
+
11
+ async def analyze_uploaded_document(file: UploadFile):
12
+ contents = await file.read()
13
+ openai_file = io.BytesIO(contents)
14
+ openai_file.name = file.filename
15
+ thread, openai_file = await asyncio.gather(
16
+ settings.OPENAI_CLIENT.beta.threads.create(),
17
+ settings.OPENAI_CLIENT.files.create(purpose='assistants', file=openai_file)
18
+ )
19
+ await settings.OPENAI_CLIENT.beta.threads.messages.create(
20
+ attachments=[{"file_id": openai_file.id, "tools": [{"type": "file_search"}]}],
21
+ thread_id=thread.id,
22
+ role="user",
23
+ content='Generate a report on the attached document'
24
+ )
25
+ run = await settings.OPENAI_CLIENT.beta.threads.runs.create_and_poll(
26
+ assistant_id=settings.ASSISTANT_ID, thread_id=thread.id, instructions=OCRPrompts.generate_general_answer
27
+ )
28
+ return await clean_assistant_response(thread.id, run.id)
ocr/api/message/prompts.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class OCRPrompts:
2
+ generate_general_answer = """## Task
3
+
4
+ You must analyze the attached medical document and generate a comprehensive report in **Markdown2** format. Ensure that every detail provided in the document is included, and do not omit or modify any information. Your output must strictly follow the required format.
5
+
6
+ ## Report Structure
7
+
8
+ The report should be structured as follows, with each section containing only relevant information from the document:
9
+
10
+ 1. **Diagnosis and Staging Details**
11
+ Include all diagnosis-related and staging information.
12
+
13
+ 2. **Tumor Markers and Pathology Findings**
14
+ Provide detailed tumor markers and any pathology results mentioned.
15
+
16
+ 3. **Imaging Results** (e.g., CT, MRI summaries)
17
+ Summarize all relevant imaging results provided in the document.
18
+
19
+ 4. **Prior Treatments and Outcomes**
20
+ Detail any prior treatments and their outcomes as found in the document.
21
+
22
+ ## Instructions
23
+
24
+ - Your response must be in **Markdown2** format.
25
+ - Do not use bullet points very often.
26
+ - **Do not invent or infer any information.** Only use data provided in the document.
27
+ - If any section listed in the report structure lacks corresponding information, **omit the section entirely**. Do not leave blank sections.
28
+ - Ensure that the format is followed strictly, and the output is complete without any deviations."""
ocr/api/message/schemas.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel
2
+
3
+
4
+ class OcrResponse(BaseModel):
5
+ text: str
6
+
ocr/api/message/utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from ocr.core.config import settings
2
+
3
+
4
+ async def clean_assistant_response(thread_id: str, run_id: str):
5
+ result = ''
6
+ async for message in settings.OPENAI_CLIENT.beta.threads.messages.list(thread_id=thread_id, run_id=run_id):
7
+ message_content = message.content[0].text
8
+ annotations = message_content.annotations
9
+ for annotation in annotations:
10
+ message_content.value = message_content.value.replace(annotation.text, f"")
11
+ result = message_content.value
12
+ return result
ocr/api/message/views.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import File, UploadFile
2
+
3
+ from ocr.api.message import ocr_router
4
+ from ocr.api.message.openai_request import analyze_uploaded_document
5
+ from ocr.api.message.schemas import OcrResponse
6
+ from ocr.core.wrappers import OcrResponseWrapper
7
+
8
+
9
+ @ocr_router.post('/parse')
10
+ async def get_all_chat_messages(
11
+ file: UploadFile = File(...)
12
+ ) -> OcrResponseWrapper[OcrResponse]:
13
+ response = await analyze_uploaded_document(file)
14
+ with open('README.md', 'w') as file:
15
+ file.write(response)
16
+ return OcrResponseWrapper(data=OcrResponse(text=response))
ocr/core/config.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pathlib
3
+ from functools import lru_cache
4
+
5
+ from dotenv import load_dotenv
6
+ from openai import AsyncClient
7
+
8
+ load_dotenv()
9
+
10
+ class BaseConfig:
11
+ BASE_DIR: pathlib.Path = pathlib.Path(__file__).parent.parent.parent
12
+ SECRET_KEY = os.getenv('SECRET')
13
+ OPENAI_CLIENT = AsyncClient(api_key=os.getenv('OPENAI_API_KEY'))
14
+ ASSISTANT_ID = os.getenv('ASSISTANT_ID')
15
+
16
+ class DevelopmentConfig(BaseConfig):
17
+ Issuer = "http://localhost:8000"
18
+ Audience = "http://localhost:3000"
19
+
20
+
21
+ class ProductionConfig(BaseConfig):
22
+ Issuer = ""
23
+ Audience = ""
24
+
25
+
26
+ @lru_cache()
27
+ def get_settings() -> DevelopmentConfig | ProductionConfig:
28
+ config_cls_dict = {
29
+ 'development': DevelopmentConfig,
30
+ 'production': ProductionConfig,
31
+ }
32
+ config_name = os.getenv('FASTAPI_CONFIG', default='development')
33
+ config_cls = config_cls_dict[config_name]
34
+ return config_cls()
35
+
36
+
37
+ settings = get_settings()
ocr/core/wrappers.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from functools import wraps
3
+ from typing import Generic, Optional, TypeVar
4
+
5
+ import pydash
6
+ from fastapi import HTTPException
7
+ from pydantic import BaseModel
8
+ from starlette.responses import JSONResponse
9
+
10
+ from ocr.core.config import settings
11
+
12
+ T = TypeVar('T')
13
+
14
+
15
+ class ErrorOcrResponse(BaseModel):
16
+ message: str
17
+
18
+
19
+ class OcrResponseWrapper(BaseModel, Generic[T]):
20
+ data: Optional[T] = None
21
+ successful: bool = True
22
+ error: Optional[ErrorOcrResponse] = None
23
+
24
+ def response(self, status_code: int):
25
+ return JSONResponse(
26
+ status_code=status_code,
27
+ content={
28
+ "data": self.data,
29
+ "successful": self.successful,
30
+ "error": self.error.dict() if self.error else None
31
+ }
32
+ )
33
+
34
+
35
+ def exception_wrapper(http_error: int, error_message: str):
36
+ def decorator(func):
37
+ @wraps(func)
38
+ async def wrapper(*args, **kwargs):
39
+ try:
40
+ return await func(*args, **kwargs)
41
+ except Exception as e:
42
+ raise HTTPException(status_code=http_error, detail=error_message) from e
43
+
44
+ return wrapper
45
+
46
+ return decorator
47
+
48
+
49
+ def openai_wrapper(
50
+ temperature: int | float = 0, model: str = "gpt-4o-mini", is_json: bool = False, return_: str = None
51
+ ):
52
+ def decorator(func):
53
+ @wraps(func)
54
+ async def wrapper(*args, **kwargs) -> str:
55
+ messages = await func(*args, **kwargs)
56
+ completion = await settings.OPENAI_CLIENT.chat.completions.create(
57
+ messages=messages,
58
+ temperature=temperature,
59
+ n=1,
60
+ model=model,
61
+ response_format={"type": "json_object"} if is_json else {"type": "text"}
62
+ )
63
+ response = completion.choices[0].message.content
64
+ if is_json:
65
+ response = json.loads(response)
66
+ if return_:
67
+ return pydash.get(response, return_)
68
+ return response
69
+
70
+ return wrapper
71
+
72
+ return decorator
73
+
74
+
75
+ def background_task():
76
+ def decorator(func):
77
+ @wraps(func)
78
+ async def wrapper(*args, **kwargs) -> str:
79
+ try:
80
+ result = await func(*args, **kwargs)
81
+ return result
82
+ except Exception as e:
83
+ pass
84
+
85
+ return wrapper
86
+
87
+ return decorator