sachin commited on
Commit
5cd593d
·
1 Parent(s): 93119e7
Files changed (4) hide show
  1. .dockerignore +85 -0
  2. Dockerfile +54 -0
  3. ux/README.md +2 -0
  4. ux/discovery_demo.py +148 -0
.dockerignore ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ samples
2
+ *.wav
3
+ *.log
4
+ venv
5
+ *.nemo
6
+
7
+ docs
8
+ misc
9
+ samples
10
+ # Ignore all Python files except those explicitly copied
11
+ *.pyc
12
+ *.pyo
13
+ *.pyd
14
+
15
+ # Ignore all virtual environments
16
+ venv/
17
+ env/
18
+ .env/
19
+ .venv/
20
+ __pycache__/
21
+
22
+ # Ignore build artifacts
23
+ build/
24
+ dist/
25
+ *.egg-info/
26
+
27
+ # Ignore local version control files
28
+ .git/
29
+ .gitignore
30
+
31
+ # Ignore local environment files
32
+ .env
33
+
34
+ # Ignore local log files
35
+ *.log
36
+
37
+ # Ignore all node_modules
38
+ node_modules/
39
+
40
+ # Ignore all Docker-related files
41
+ Dockerfile
42
+ docker-compose.yml
43
+
44
+ # Ignore all local development files
45
+ .vscode/
46
+ .idea/
47
+ .pytest_cache/
48
+
49
+ # Ignore all test files
50
+ *.test.*
51
+ *.spec.*
52
+ *_test.*
53
+ *_spec.*
54
+
55
+ # Ignore all backup files
56
+ *.bak
57
+ *.swp
58
+ *.tmp
59
+ *.orig
60
+
61
+ # Ignore all documentation files
62
+ *.md
63
+ *.txt
64
+ *.rst
65
+
66
+ # Ignore all temporary files
67
+ *.tmp
68
+ *.temp
69
+ *.cache
70
+
71
+ # Ignore all user-specific files
72
+ *.user
73
+ *.prefs
74
+ *.rc
75
+
76
+ # Ignore all unnecessary directories and files
77
+ __pycache__
78
+ __pypackages__
79
+
80
+
81
+ !requirements.txt
82
+ !docker-requirements.txt
83
+
84
+ #!model_requirements.txt
85
+ #!server_requirements.txt
Dockerfile ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Stage 1: Build stage
2
+ FROM python:3.10-alpine AS builder
3
+
4
+ WORKDIR /app
5
+
6
+ # Set environment variables
7
+ ENV PYTHONDONTWRITEBYTECODE=1
8
+ ENV PYTHONUNBUFFERED=1
9
+
10
+ # Install build dependencies
11
+ RUN apk add --no-cache \
12
+ gcc \
13
+ musl-dev \
14
+ curl \
15
+ libjpeg-turbo-dev \
16
+ zlib-dev \
17
+ libpng-dev
18
+
19
+ # Install Python dependencies
20
+ COPY client-requirements.txt .
21
+ RUN pip install --no-cache-dir --user -r client-requirements.txt
22
+
23
+ # Stage 2: Final stage
24
+ FROM python:3.10-alpine
25
+
26
+ WORKDIR /app
27
+
28
+ # Set environment variables
29
+ ENV PYTHONDONTWRITEBYTECODE=1
30
+ ENV PYTHONUNBUFFERED=1
31
+
32
+ # Install runtime dependencies only
33
+ RUN apk add --no-cache \
34
+ libjpeg-turbo \
35
+ zlib \
36
+ libpng \
37
+ && rm -rf /var/cache/apk/*
38
+
39
+ # Copy installed Python dependencies from builder stage
40
+ COPY --from=builder /root/.local /home/appuser/.local
41
+
42
+ # Copy the application code
43
+ COPY . .
44
+
45
+ # Create appuser and set permissions for /app and /data
46
+ RUN adduser -D appuser \
47
+ && mkdir -p /data \
48
+ && chown -R appuser:appuser /app /data
49
+
50
+ USER appuser
51
+ EXPOSE 7860
52
+
53
+ # Command to run the Gradio program
54
+ CMD ["python", "ux/discovery_demo.py"]
ux/README.md ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ UX for discovery
2
+
ux/discovery_demo.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import logging
3
+
4
+ from openai import OpenAI
5
+
6
+ # Set up logging
7
+ logging.basicConfig(level=logging.INFO)
8
+ logger = logging.getLogger(__name__)
9
+
10
+ from openai import OpenAI
11
+
12
+ import base64
13
+ from io import BytesIO
14
+
15
+ from pdf2image import convert_from_path
16
+
17
+ def encode_image(image: BytesIO) -> str:
18
+ """Encode image bytes to base64 string."""
19
+ return base64.b64encode(image.read()).decode("utf-8")
20
+
21
+ # Dynamic LLM client based on model
22
+ def get_openai_client(model: str) -> OpenAI:
23
+ """Initialize OpenAI client with model-specific base URL."""
24
+ valid_models = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
25
+ if model not in valid_models:
26
+ raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(valid_models)}")
27
+
28
+ model_ports = {
29
+ "qwen3": "9100",
30
+ "gemma3": "9000",
31
+ "moondream": "7882",
32
+ "qwen2.5vl": "7883",
33
+ }
34
+ base_url = f"http://0.0.0.0:{model_ports[model]}/v1"
35
+
36
+ return OpenAI(api_key="http", base_url=base_url)
37
+
38
+
39
+ def ocr_page_with_rolm(img_base64: str, model: str) -> str:
40
+ """Perform OCR on the provided base64 image using the specified model."""
41
+ try:
42
+ client = get_openai_client(model)
43
+ response = client.chat.completions.create(
44
+ model=model,
45
+ messages=[
46
+ {
47
+ "role": "user",
48
+ "content": [
49
+ {
50
+ "type": "image_url",
51
+ "image_url": {"url": f"data:image/png;base64,{img_base64}"}
52
+ },
53
+ {"type": "text", "text": "Return the plain text extracted from this image."}
54
+ ]
55
+ }
56
+ ],
57
+ temperature=0.2,
58
+ max_tokens=4096
59
+ )
60
+ return response.choices[0].message.content
61
+ except Exception as e:
62
+ raise Exception(status_code=500, detail=f"OCR processing failed: {str(e)}")
63
+
64
+
65
+
66
+ # --- PDF Processing Module ---
67
+ def process_pdf(pdf_file, page_number, prompt):
68
+ if not pdf_file:
69
+ return {"error": "Please upload a PDF file"}
70
+ if not prompt.strip():
71
+ return {"error": "Please provide a non-empty prompt"}
72
+ try:
73
+ page_number = int(page_number)
74
+ if page_number < 1:
75
+ raise ValueError("Page number must be at least 1")
76
+ except (ValueError, TypeError):
77
+ return {"error": "Page number must be a positive integer"}
78
+ file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
79
+
80
+
81
+ # Change 'your_pdf_file.pdf' to the path of your PDF
82
+ images = convert_from_path(file_path)
83
+
84
+ for i, image in enumerate(images):
85
+ #image.save(f'page_{i + 1}.jpg', 'JPEG')
86
+ image_bytes_io = BytesIO()
87
+ image.save(image_bytes_io, format='JPEG')
88
+ #image_bytes = image_bytes_io.getvalue()
89
+
90
+ image_bytes_io.seek(0) # Reset cursor to start if needed
91
+ img_base64 = encode_image(image_bytes_io)
92
+ text = ocr_page_with_rolm(img_base64, model="gemma3")
93
+ logger.debug(text)
94
+ return {"extracted_text": text}
95
+
96
+
97
+ # --- Gradio Interface ---
98
+ css = """
99
+ .gradio-container {
100
+ max-width: 1200px;
101
+ margin: auto;
102
+ }
103
+ #chatbot {
104
+ height: calc(100vh - 200px);
105
+ max-height: 800px;
106
+ }
107
+ #conversations {
108
+ max-height: 600px;
109
+ overflow-y: auto;
110
+ }
111
+ """
112
+
113
+
114
+
115
+ with gr.Blocks(title="dwani.ai - Discovery", css=css, fill_width=True) as demo:
116
+ gr.Markdown("# Document Analytics")
117
+
118
+ with gr.Tabs():
119
+
120
+ # PDF Query Tab
121
+ with gr.Tab("PDF Query"):
122
+ gr.Markdown("Query PDF files with a custom prompt")
123
+ with gr.Row():
124
+ with gr.Column():
125
+ pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
126
+ pdf_page = gr.Number(label="Page Number", value=1, minimum=1, precision=0)
127
+ pdf_prompt = gr.Textbox(
128
+ label="Custom Prompt",
129
+ placeholder="e.g., List the key points",
130
+ value="List the key points",
131
+ lines=3
132
+ )
133
+ pdf_submit = gr.Button("Process")
134
+ with gr.Column():
135
+ pdf_output = gr.JSON(label="PDF Response")
136
+ pdf_submit.click(
137
+ fn=process_pdf,
138
+ inputs=[pdf_input, pdf_page, pdf_prompt],
139
+ outputs=pdf_output
140
+ )
141
+
142
+ # Launch the interface
143
+ if __name__ == "__main__":
144
+ try:
145
+ demo.launch(server_name="0.0.0.0", server_port=7860)
146
+ except Exception as e:
147
+ logger.error(f"Failed to launch Gradio interface: {str(e)}")
148
+ print(f"Failed to launch Gradio interface: {str(e)}")