liamcripwell commited on
Commit
ee86994
ยท
1 Parent(s): 299f86a

initial content push

Browse files
Files changed (5) hide show
  1. .gitignore +9 -0
  2. Dockerfile +47 -0
  3. app.py +93 -0
  4. numarkdown.svg +78 -0
  5. start.sh +36 -0
.gitignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .env
6
+ .venv/
7
+ venv/
8
+ .DS_Store
9
+ *.log
Dockerfile ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
2
+
3
+ # System dependencies (as root)
4
+ RUN apt-get update && apt-get install -y \
5
+ git \
6
+ curl \
7
+ wget \
8
+ python3 \
9
+ python3-pip \
10
+ python3-dev \
11
+ libgl1 \
12
+ && rm -rf /var/lib/apt/lists/*
13
+
14
+ # Set Python aliases (as root)
15
+ RUN ln -s /usr/bin/python3 /usr/bin/python
16
+ RUN ln -sf /usr/bin/pip3 /usr/bin/pip
17
+
18
+ # Install system-level Python packages (as root)
19
+ RUN pip install --upgrade pip
20
+
21
+ # Set up a new user named "user" with user ID 1000
22
+ RUN useradd -m -u 1000 user
23
+
24
+ # Switch to the "user" user
25
+ USER user
26
+
27
+ # Set home to the user's home directory
28
+ ENV HOME=/home/user \
29
+ PATH=/home/user/.local/bin:$PATH
30
+
31
+ # Set the working directory to the user's home directory
32
+ WORKDIR $HOME/app
33
+
34
+ # Install Python packages as user (goes to ~/.local/bin)
35
+ RUN pip install --no-cache-dir --user "vllm[triton]"
36
+ RUN pip install --no-cache-dir --user gradio Pillow requests aiohttp
37
+
38
+ # Copy files with correct ownership
39
+ COPY --chown=user numarkdown.svg $HOME/app/
40
+ COPY --chown=user app.py $HOME/app/
41
+ COPY --chown=user start.sh $HOME/app/
42
+ RUN chmod +x $HOME/app/start.sh
43
+
44
+ # Expose the Space UI port
45
+ EXPOSE 7860
46
+
47
+ CMD ["./start.sh"]
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import base64
4
+ from PIL import Image
5
+ from io import BytesIO
6
+
7
+ def encode_image_to_base64(image: Image.Image) -> str:
8
+ buffered = BytesIO()
9
+ image.save(buffered, format="JPEG")
10
+ img_str = base64.b64encode(buffered.getvalue()).decode()
11
+ return f"data:image/jpeg;base64,{img_str}"
12
+
13
+ def query_vllm_api(image, temperature, max_tokens=12_000):
14
+ messages = []
15
+ if image is not None:
16
+ # Optional: Resize image if needed (to avoid huge uploads)
17
+ max_size = 1024
18
+ if max(image.size) > max_size:
19
+ ratio = max_size / max(image.size)
20
+ new_size = tuple(int(dim * ratio) for dim in image.size)
21
+ image = image.resize(new_size, Image.Resampling.LANCZOS)
22
+
23
+ image_b64 = encode_image_to_base64(image)
24
+ messages.append({
25
+ "role": "user",
26
+ "content": [
27
+ {"type": "image_url", "image_url": {"url": image_b64}}
28
+ ]
29
+ })
30
+
31
+ payload = {
32
+ "model": "numind/NuMarkdown-8B-Thinking",
33
+ "messages": messages,
34
+ "max_tokens": max_tokens,
35
+ "temperature": temperature
36
+ }
37
+
38
+ try:
39
+ response = requests.post(
40
+ "http://localhost:8000/v1/chat/completions",
41
+ json=payload,
42
+ timeout=60
43
+ )
44
+ response.raise_for_status()
45
+ data = response.json()
46
+
47
+ result = data["choices"][0]["message"]["content"]
48
+ reasoning = result.split("<think>")[1].split("</think>")[0]
49
+ answer = result.split("<answer>")[1].split("</answer>")[0]
50
+
51
+ return reasoning, answer, answer
52
+ except requests.exceptions.RequestException as e:
53
+ return f"API request failed: {e}"
54
+
55
+ with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
56
+ # Clean banner with centered content
57
+ gr.HTML("""
58
+ <div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
59
+ <h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">๐Ÿ‘๏ธ NuMarkdown-8B-Thinking</h1>
60
+ <p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
61
+ <div style="margin-top: 15px;">
62
+ <a href="https://nuextract.ai/" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿ–ฅ๏ธ API / Platform</a>
63
+ <span style="color: rgba(255,255,255,0.7);">|</span>
64
+ <a href="https://discord.gg/3tsEtJNCDe" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿ—ฃ๏ธ Discord</a>
65
+ <span style="color: rgba(255,255,255,0.7);">|</span>
66
+ <a href="https://github.com/numindai/NuMarkdown" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿ”— GitHub</a>
67
+ <span style="color: rgba(255,255,255,0.7);">|</span>
68
+ <a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐Ÿค— Model</a>
69
+ </div>
70
+ </div>
71
+
72
+ <p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
73
+ <p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
74
+ """)
75
+
76
+ with gr.Row():
77
+ with gr.Column():
78
+ temperature = gr.Slider(0.1, 1.5, value=0.6, step=0.1, label="Temperature")
79
+ img_in = gr.Image(type="pil", label="Upload Image")
80
+ btn = gr.Button("Generate Response")
81
+ with gr.Column():
82
+ thinking = gr.Textbox(label="Thinking Trace", lines=10)
83
+ raw_answer = gr.Textbox(label="Raw Output", lines=5)
84
+ output = gr.Markdown(label="Response")
85
+
86
+ btn.click(
87
+ query_vllm_api,
88
+ inputs=[img_in, temperature],
89
+ outputs=[thinking, raw_answer, output],
90
+ )
91
+
92
+ if __name__ == "__main__":
93
+ demo.launch(share=True)
numarkdown.svg ADDED
start.sh ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ echo "Starting vLLM server..."
4
+ # Start vLLM in the background with logging
5
+ python3 -m vllm.entrypoints.openai.api_server \
6
+ --model numind/NuMarkdown-8B-Thinking \
7
+ --port 8000 \
8
+ --host 0.0.0.0 \
9
+ --max-model-len 20000 \
10
+ --gpu-memory-utilization 0.6 > $HOME/app/vllm.log 2>&1 &
11
+
12
+ VLLM_PID=$!
13
+ echo "vLLM started with PID: $VLLM_PID"
14
+
15
+ # Wait for vLLM to be ready
16
+ echo "Waiting for vLLM server to start..."
17
+ for i in {1..90}; do
18
+ if curl -s http://localhost:8000/v1/models > /dev/null; then
19
+ echo "vLLM server is ready!"
20
+ break
21
+ fi
22
+ echo "Waiting... ($i/90)"
23
+ sleep 2
24
+ done
25
+
26
+ # Check if vLLM is actually running
27
+ if ! curl -s http://localhost:8000/v1/models > /dev/null; then
28
+ echo "ERROR: vLLM server failed to start!"
29
+ echo "vLLM logs:"
30
+ cat $HOME/app/vllm.log
31
+ exit 1
32
+ fi
33
+
34
+ echo "Starting Gradio app..."
35
+ # Start Gradio app in the foreground
36
+ python3 $HOME/app/app.py