Spaces:

dwani
/

discovery-ux

Sleeping

App Files Files Community

sachin commited on Aug 11

Commit

5cd593d

1 Parent(s): 93119e7

update

Browse files

Files changed (4) hide show

.dockerignore +85 -0
Dockerfile +54 -0
ux/README.md +2 -0
ux/discovery_demo.py +148 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,85 @@

+samples
+*.wav
+*.log
+venv
+*.nemo
+docs
+misc
+samples
+# Ignore all Python files except those explicitly copied
+*.pyc
+*.pyo
+*.pyd
+# Ignore all virtual environments
+venv/
+env/
+.env/
+.venv/
+__pycache__/
+# Ignore build artifacts
+build/
+dist/
+*.egg-info/
+# Ignore local version control files
+.git/
+.gitignore
+# Ignore local environment files
+.env
+# Ignore local log files
+*.log
+# Ignore all node_modules
+node_modules/
+# Ignore all Docker-related files
+Dockerfile
+docker-compose.yml
+# Ignore all local development files
+.vscode/
+.idea/
+.pytest_cache/
+# Ignore all test files
+*.test.*
+*.spec.*
+*_test.*
+*_spec.*
+# Ignore all backup files
+*.bak
+*.swp
+*.tmp
+*.orig
+# Ignore all documentation files
+*.md
+*.txt
+*.rst
+# Ignore all temporary files
+*.tmp
+*.temp
+*.cache
+# Ignore all user-specific files
+*.user
+*.prefs
+*.rc
+# Ignore all unnecessary directories and files
+__pycache__
+__pypackages__
+!requirements.txt
+!docker-requirements.txt
+#!model_requirements.txt
+#!server_requirements.txt

Dockerfile ADDED Viewed

	@@ -0,0 +1,54 @@

+# Stage 1: Build stage
+FROM python:3.10-alpine AS builder
+WORKDIR /app
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Install build dependencies
+RUN apk add --no-cache \
+    gcc \
+    musl-dev \
+    curl \
+    libjpeg-turbo-dev \
+    zlib-dev \
+    libpng-dev
+# Install Python dependencies
+COPY client-requirements.txt .
+RUN pip install --no-cache-dir --user -r client-requirements.txt
+# Stage 2: Final stage
+FROM python:3.10-alpine
+WORKDIR /app
+# Set environment variables
+ENV PYTHONDONTWRITEBYTECODE=1
+ENV PYTHONUNBUFFERED=1
+# Install runtime dependencies only
+RUN apk add --no-cache \
+    libjpeg-turbo \
+    zlib \
+    libpng \
+    && rm -rf /var/cache/apk/*
+# Copy installed Python dependencies from builder stage
+COPY --from=builder /root/.local /home/appuser/.local
+# Copy the application code
+COPY . .
+# Create appuser and set permissions for /app and /data
+RUN adduser -D appuser \
+    && mkdir -p /data \
+    && chown -R appuser:appuser /app /data
+USER appuser
+EXPOSE 7860
+# Command to run the Gradio program
+CMD ["python", "ux/discovery_demo.py"]

ux/README.md ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ UX for discovery
2	+

ux/discovery_demo.py ADDED Viewed

	@@ -0,0 +1,148 @@

+import gradio as gr
+import logging
+from openai import OpenAI
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+from openai import OpenAI
+import base64
+from io import BytesIO
+from pdf2image import convert_from_path
+def encode_image(image: BytesIO) -> str:
+    """Encode image bytes to base64 string."""
+    return base64.b64encode(image.read()).decode("utf-8")
+# Dynamic LLM client based on model
+def get_openai_client(model: str) -> OpenAI:
+    """Initialize OpenAI client with model-specific base URL."""
+    valid_models = ["gemma3", "moondream", "qwen2.5vl", "qwen3", "sarvam-m", "deepseek-r1"]
+    if model not in valid_models:
+        raise ValueError(f"Invalid model: {model}. Choose from: {', '.join(valid_models)}")
+    model_ports = {
+        "qwen3": "9100",
+        "gemma3": "9000",
+        "moondream": "7882",
+        "qwen2.5vl": "7883",
+    }
+    base_url = f"http://0.0.0.0:{model_ports[model]}/v1"
+    return OpenAI(api_key="http", base_url=base_url)
+def ocr_page_with_rolm(img_base64: str, model: str) -> str:
+    """Perform OCR on the provided base64 image using the specified model."""
+    try:
+        client = get_openai_client(model)
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/png;base64,{img_base64}"}
+                        },
+                        {"type": "text", "text": "Return the plain text extracted from this image."}
+                    ]
+                }
+            ],
+            temperature=0.2,
+            max_tokens=4096
+        )
+        return response.choices[0].message.content
+    except Exception as e:
+        raise Exception(status_code=500, detail=f"OCR processing failed: {str(e)}")
+# --- PDF Processing Module ---
+def process_pdf(pdf_file, page_number, prompt):
+    if not pdf_file:
+        return {"error": "Please upload a PDF file"}
+    if not prompt.strip():
+        return {"error": "Please provide a non-empty prompt"}
+    try:
+        page_number = int(page_number)
+        if page_number < 1:
+            raise ValueError("Page number must be at least 1")
+    except (ValueError, TypeError):
+        return {"error": "Page number must be a positive integer"}
+    file_path = pdf_file.name if hasattr(pdf_file, 'name') else pdf_file
+    # Change 'your_pdf_file.pdf' to the path of your PDF
+    images = convert_from_path(file_path)
+    for i, image in enumerate(images):
+        #image.save(f'page_{i + 1}.jpg', 'JPEG')
+        image_bytes_io = BytesIO()
+        image.save(image_bytes_io, format='JPEG')
+        #image_bytes = image_bytes_io.getvalue()
+        image_bytes_io.seek(0)  # Reset cursor to start if needed
+        img_base64 = encode_image(image_bytes_io)
+        text = ocr_page_with_rolm(img_base64, model="gemma3")
+        logger.debug(text)
+        return {"extracted_text": text}
+# --- Gradio Interface ---
+css = """
+.gradio-container {
+    max-width: 1200px;
+    margin: auto;
+}
+#chatbot {
+    height: calc(100vh - 200px);
+    max-height: 800px;
+}
+#conversations {
+    max-height: 600px;
+    overflow-y: auto;
+}
+"""
+with gr.Blocks(title="dwani.ai - Discovery", css=css, fill_width=True) as demo:
+    gr.Markdown("# Document Analytics")
+    with gr.Tabs():
+        # PDF Query Tab
+        with gr.Tab("PDF Query"):
+            gr.Markdown("Query PDF files with a custom prompt")
+            with gr.Row():
+                with gr.Column():
+                    pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"])
+                    pdf_page = gr.Number(label="Page Number", value=1, minimum=1, precision=0)
+                    pdf_prompt = gr.Textbox(
+                        label="Custom Prompt",
+                        placeholder="e.g., List the key points",
+                        value="List the key points",
+                        lines=3
+                    )
+                    pdf_submit = gr.Button("Process")
+                with gr.Column():
+                    pdf_output = gr.JSON(label="PDF Response")
+            pdf_submit.click(
+                fn=process_pdf,
+                inputs=[pdf_input, pdf_page, pdf_prompt],
+                outputs=pdf_output
+            )
+# Launch the interface
+if __name__ == "__main__":
+    try:
+        demo.launch(server_name="0.0.0.0", server_port=7860)
+    except Exception as e:
+        logger.error(f"Failed to launch Gradio interface: {str(e)}")
+        print(f"Failed to launch Gradio interface: {str(e)}")