Spaces:
Sleeping
Sleeping
sachin
commited on
Commit
·
5cd593d
1
Parent(s):
93119e7
update
Browse files- .dockerignore +85 -0
- Dockerfile +54 -0
- ux/README.md +2 -0
- ux/discovery_demo.py +148 -0
.dockerignore
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
samples
|
2 |
+
*.wav
|
3 |
+
*.log
|
4 |
+
venv
|
5 |
+
*.nemo
|
6 |
+
|
7 |
+
docs
|
8 |
+
misc
|
9 |
+
samples
|
10 |
+
# Ignore all Python files except those explicitly copied
|
11 |
+
*.pyc
|
12 |
+
*.pyo
|
13 |
+
*.pyd
|
14 |
+
|
15 |
+
# Ignore all virtual environments
|
16 |
+
venv/
|
17 |
+
env/
|
18 |
+
.env/
|
19 |
+
.venv/
|
20 |
+
__pycache__/
|
21 |
+
|
22 |
+
# Ignore build artifacts
|
23 |
+
build/
|
24 |
+
dist/
|
25 |
+
*.egg-info/
|
26 |
+
|
27 |
+
# Ignore local version control files
|
28 |
+
.git/
|
29 |
+
.gitignore
|
30 |
+
|
31 |
+
# Ignore local environment files
|
32 |
+
.env
|
33 |
+
|
34 |
+
# Ignore local log files
|
35 |
+
*.log
|
36 |
+
|
37 |
+
# Ignore all node_modules
|
38 |
+
node_modules/
|
39 |
+
|
40 |
+
# Ignore all Docker-related files
|
41 |
+
Dockerfile
|
42 |
+
docker-compose.yml
|
43 |
+
|
44 |
+
# Ignore all local development files
|
45 |
+
.vscode/
|
46 |
+
.idea/
|
47 |
+
.pytest_cache/
|
48 |
+
|
49 |
+
# Ignore all test files
|
50 |
+
*.test.*
|
51 |
+
*.spec.*
|
52 |
+
*_test.*
|
53 |
+
*_spec.*
|
54 |
+
|
55 |
+
# Ignore all backup files
|
56 |
+
*.bak
|
57 |
+
*.swp
|
58 |
+
*.tmp
|
59 |
+
*.orig
|
60 |
+
|
61 |
+
# Ignore all documentation files
|
62 |
+
*.md
|
63 |
+
*.txt
|
64 |
+
*.rst
|
65 |
+
|
66 |
+
# Ignore all temporary files
|
67 |
+
*.tmp
|
68 |
+
*.temp
|
69 |
+
*.cache
|
70 |
+
|
71 |
+
# Ignore all user-specific files
|
72 |
+
*.user
|
73 |
+
*.prefs
|
74 |
+
*.rc
|
75 |
+
|
76 |
+
# Ignore all unnecessary directories and files
|
77 |
+
__pycache__
|
78 |
+
__pypackages__
|
79 |
+
|
80 |
+
|
81 |
+
!requirements.txt
|
82 |
+
!docker-requirements.txt
|
83 |
+
|
84 |
+
#!model_requirements.txt
|
85 |
+
#!server_requirements.txt
|
Dockerfile
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Stage 1: Build stage
|
2 |
+
FROM python:3.10-alpine AS builder
|
3 |
+
|
4 |
+
WORKDIR /app
|
5 |
+
|
6 |
+
# Set environment variables
|
7 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
8 |
+
ENV PYTHONUNBUFFERED=1
|
9 |
+
|
10 |
+
# Install build dependencies
|
11 |
+
RUN apk add --no-cache \
|
12 |
+
gcc \
|
13 |
+
musl-dev \
|
14 |
+
curl \
|
15 |
+
libjpeg-turbo-dev \
|
16 |
+
zlib-dev \
|
17 |
+
libpng-dev
|
18 |
+
|
19 |
+
# Install Python dependencies
|
20 |
+
COPY client-requirements.txt .
|
21 |
+
RUN pip install --no-cache-dir --user -r client-requirements.txt
|
22 |
+
|
23 |
+
# Stage 2: Final stage
|
24 |
+
FROM python:3.10-alpine
|
25 |
+
|
26 |
+
WORKDIR /app
|
27 |
+
|
28 |
+
# Set environment variables
|
29 |
+
ENV PYTHONDONTWRITEBYTECODE=1
|
30 |
+
ENV PYTHONUNBUFFERED=1
|
31 |
+
|
32 |
+
# Install runtime dependencies only
|
33 |
+
RUN apk add --no-cache \
|
34 |
+
libjpeg-turbo \
|
35 |
+
zlib \
|
36 |
+
libpng \
|
37 |
+
&& rm -rf /var/cache/apk/*
|
38 |
+
|
39 |
+
# Copy installed Python dependencies from builder stage
|
40 |
+
COPY --from=builder /root/.local /home/appuser/.local
|
41 |
+
|
42 |
+
# Copy the application code
|
43 |
+
COPY . .
|
44 |
+
|
45 |
+
# Create appuser and set permissions for /app and /data
|
46 |
+
RUN adduser -D appuser \
|
47 |
+
&& mkdir -p /data \
|
48 |
+
&& chown -R appuser:appuser /app /data
|
49 |
+
|
50 |
+
USER appuser
|
51 |
+
EXPOSE 7860
|
52 |
+
|
53 |
+
# Command to run the Gradio program
|
54 |
+
CMD ["python", "ux/discovery_demo.py"]
|
ux/README.md
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
UX for discovery
|
2 |
+
|
ux/discovery_demo.py
ADDED
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import logging
|
3 |
+
|
4 |
+
from openai import OpenAI
|
5 |
+
|
6 |
+
# Set up logging
|
7 |
+
logging.basicConfig(level=logging.INFO)
|
8 |
+
logger = logging.getLogger(__name__)
|
9 |
+
|
10 |
+
from openai import OpenAI
|
11 |
+
|
12 |
+
import base64
|
13 |
+
from io import BytesIO
|
14 |
+
|
15 |
+
from pdf2image import convert_from_path
|
16 |
+
|
17 |
+
def encode_image(image: BytesIO) -> str:
|
18 |
+
"""Encode image bytes to base64 string."""
|
19 |
+
return base64.b64encode(image.read()).decode("utf-8")
|
20 |
+
|
21 |
+
# Dynamic LLM client based on model
|
22 |
+
def get_openai_client(model: str) -> OpenAI:
|
23 |
+
"""Initialize OpenAI client with model-specific base URL."""
|
24 |
+
valid_models = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
|
25 |
+
if model not in valid_models:
|
26 |
+
raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(valid_models)}")
|
27 |
+
|
28 |
+
model_ports = {
|
29 |
+
"qwen3": "9100",
|
30 |
+
"gemma3": "9000",
|
31 |
+
"moondream": "7882",
|
32 |
+
"qwen2.5vl": "7883",
|
33 |
+
}
|
34 |
+
base_url = f"http://0.0.0.0:{model_ports[model]}/v1"
|
35 |
+
|
36 |
+
return OpenAI(api_key="http", base_url=base_url)
|
37 |
+
|
38 |
+
|
39 |
+
def ocr_page_with_rolm(img_base64: str, model: str) -> str:
|
40 |
+
"""Perform OCR on the provided base64 image using the specified model."""
|
41 |
+
try:
|
42 |
+
client = get_openai_client(model)
|
43 |
+
response = client.chat.completions.create(
|
44 |
+
model=model,
|
45 |
+
messages=[
|
46 |
+
{
|
47 |
+
"role": "user",
|
48 |
+
"content": [
|
49 |
+
{
|
50 |
+
"type": "image_url",
|
51 |
+
"image_url": {"url": f"data:image/png;base64,{img_base64}"}
|
52 |
+
},
|
53 |
+
{"type": "text", "text": "Return the plain text extracted from this image."}
|
54 |
+
]
|
55 |
+
}
|
56 |
+
],
|
57 |
+
temperature=0.2,
|
58 |
+
max_tokens=4096
|
59 |
+
)
|
60 |
+
return response.choices[0].message.content
|
61 |
+
except Exception as e:
|
62 |
+
raise Exception(status_code=500, detail=f"OCR processing failed: {str(e)}")
|
63 |
+
|
64 |
+
|
65 |
+
|
66 |
+
# --- PDF Processing Module ---
|
67 |
+
def process_pdf(pdf_file, page_number, prompt):
|
68 |
+
if not pdf_file:
|
69 |
+
return {"error": "Please upload a PDF file"}
|
70 |
+
if not prompt.strip():
|
71 |
+
return {"error": "Please provide a non-empty prompt"}
|
72 |
+
try:
|
73 |
+
page_number = int(page_number)
|
74 |
+
if page_number < 1:
|
75 |
+
raise ValueError("Page number must be at least 1")
|
76 |
+
except (ValueError, TypeError):
|
77 |
+
return {"error": "Page number must be a positive integer"}
|
78 |
+
file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
|
79 |
+
|
80 |
+
|
81 |
+
# Change 'your_pdf_file.pdf' to the path of your PDF
|
82 |
+
images = convert_from_path(file_path)
|
83 |
+
|
84 |
+
for i, image in enumerate(images):
|
85 |
+
#image.save(f'page_{i + 1}.jpg', 'JPEG')
|
86 |
+
image_bytes_io = BytesIO()
|
87 |
+
image.save(image_bytes_io, format='JPEG')
|
88 |
+
#image_bytes = image_bytes_io.getvalue()
|
89 |
+
|
90 |
+
image_bytes_io.seek(0) # Reset cursor to start if needed
|
91 |
+
img_base64 = encode_image(image_bytes_io)
|
92 |
+
text = ocr_page_with_rolm(img_base64, model="gemma3")
|
93 |
+
logger.debug(text)
|
94 |
+
return {"extracted_text": text}
|
95 |
+
|
96 |
+
|
97 |
+
# --- Gradio Interface ---
|
98 |
+
css = """
|
99 |
+
.gradio-container {
|
100 |
+
max-width: 1200px;
|
101 |
+
margin: auto;
|
102 |
+
}
|
103 |
+
#chatbot {
|
104 |
+
height: calc(100vh - 200px);
|
105 |
+
max-height: 800px;
|
106 |
+
}
|
107 |
+
#conversations {
|
108 |
+
max-height: 600px;
|
109 |
+
overflow-y: auto;
|
110 |
+
}
|
111 |
+
"""
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
with gr.Blocks(title="dwani.ai - Discovery", css=css, fill_width=True) as demo:
|
116 |
+
gr.Markdown("# Document Analytics")
|
117 |
+
|
118 |
+
with gr.Tabs():
|
119 |
+
|
120 |
+
# PDF Query Tab
|
121 |
+
with gr.Tab("PDF Query"):
|
122 |
+
gr.Markdown("Query PDF files with a custom prompt")
|
123 |
+
with gr.Row():
|
124 |
+
with gr.Column():
|
125 |
+
pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
|
126 |
+
pdf_page = gr.Number(label="Page Number", value=1, minimum=1, precision=0)
|
127 |
+
pdf_prompt = gr.Textbox(
|
128 |
+
label="Custom Prompt",
|
129 |
+
placeholder="e.g., List the key points",
|
130 |
+
value="List the key points",
|
131 |
+
lines=3
|
132 |
+
)
|
133 |
+
pdf_submit = gr.Button("Process")
|
134 |
+
with gr.Column():
|
135 |
+
pdf_output = gr.JSON(label="PDF Response")
|
136 |
+
pdf_submit.click(
|
137 |
+
fn=process_pdf,
|
138 |
+
inputs=[pdf_input, pdf_page, pdf_prompt],
|
139 |
+
outputs=pdf_output
|
140 |
+
)
|
141 |
+
|
142 |
+
# Launch the interface
|
143 |
+
if __name__ == "__main__":
|
144 |
+
try:
|
145 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
146 |
+
except Exception as e:
|
147 |
+
logger.error(f"Failed to launch Gradio interface: {str(e)}")
|
148 |
+
print(f"Failed to launch Gradio interface: {str(e)}")
|