Spaces:
Running
on
L40S
Running
on
L40S
Commit
ยท
ee86994
1
Parent(s):
299f86a
initial content push
Browse files- .gitignore +9 -0
- Dockerfile +47 -0
- app.py +93 -0
- numarkdown.svg +78 -0
- start.sh +36 -0
.gitignore
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
__pycache__/
|
2 |
+
*.pyc
|
3 |
+
*.pyo
|
4 |
+
*.pyd
|
5 |
+
.env
|
6 |
+
.venv/
|
7 |
+
venv/
|
8 |
+
.DS_Store
|
9 |
+
*.log
|
Dockerfile
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
|
2 |
+
|
3 |
+
# System dependencies (as root)
|
4 |
+
RUN apt-get update && apt-get install -y \
|
5 |
+
git \
|
6 |
+
curl \
|
7 |
+
wget \
|
8 |
+
python3 \
|
9 |
+
python3-pip \
|
10 |
+
python3-dev \
|
11 |
+
libgl1 \
|
12 |
+
&& rm -rf /var/lib/apt/lists/*
|
13 |
+
|
14 |
+
# Set Python aliases (as root)
|
15 |
+
RUN ln -s /usr/bin/python3 /usr/bin/python
|
16 |
+
RUN ln -sf /usr/bin/pip3 /usr/bin/pip
|
17 |
+
|
18 |
+
# Install system-level Python packages (as root)
|
19 |
+
RUN pip install --upgrade pip
|
20 |
+
|
21 |
+
# Set up a new user named "user" with user ID 1000
|
22 |
+
RUN useradd -m -u 1000 user
|
23 |
+
|
24 |
+
# Switch to the "user" user
|
25 |
+
USER user
|
26 |
+
|
27 |
+
# Set home to the user's home directory
|
28 |
+
ENV HOME=/home/user \
|
29 |
+
PATH=/home/user/.local/bin:$PATH
|
30 |
+
|
31 |
+
# Set the working directory to the user's home directory
|
32 |
+
WORKDIR $HOME/app
|
33 |
+
|
34 |
+
# Install Python packages as user (goes to ~/.local/bin)
|
35 |
+
RUN pip install --no-cache-dir --user "vllm[triton]"
|
36 |
+
RUN pip install --no-cache-dir --user gradio Pillow requests aiohttp
|
37 |
+
|
38 |
+
# Copy files with correct ownership
|
39 |
+
COPY --chown=user numarkdown.svg $HOME/app/
|
40 |
+
COPY --chown=user app.py $HOME/app/
|
41 |
+
COPY --chown=user start.sh $HOME/app/
|
42 |
+
RUN chmod +x $HOME/app/start.sh
|
43 |
+
|
44 |
+
# Expose the Space UI port
|
45 |
+
EXPOSE 7860
|
46 |
+
|
47 |
+
CMD ["./start.sh"]
|
app.py
ADDED
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import base64
|
4 |
+
from PIL import Image
|
5 |
+
from io import BytesIO
|
6 |
+
|
7 |
+
def encode_image_to_base64(image: Image.Image) -> str:
|
8 |
+
buffered = BytesIO()
|
9 |
+
image.save(buffered, format="JPEG")
|
10 |
+
img_str = base64.b64encode(buffered.getvalue()).decode()
|
11 |
+
return f"data:image/jpeg;base64,{img_str}"
|
12 |
+
|
13 |
+
def query_vllm_api(image, temperature, max_tokens=12_000):
|
14 |
+
messages = []
|
15 |
+
if image is not None:
|
16 |
+
# Optional: Resize image if needed (to avoid huge uploads)
|
17 |
+
max_size = 1024
|
18 |
+
if max(image.size) > max_size:
|
19 |
+
ratio = max_size / max(image.size)
|
20 |
+
new_size = tuple(int(dim * ratio) for dim in image.size)
|
21 |
+
image = image.resize(new_size, Image.Resampling.LANCZOS)
|
22 |
+
|
23 |
+
image_b64 = encode_image_to_base64(image)
|
24 |
+
messages.append({
|
25 |
+
"role": "user",
|
26 |
+
"content": [
|
27 |
+
{"type": "image_url", "image_url": {"url": image_b64}}
|
28 |
+
]
|
29 |
+
})
|
30 |
+
|
31 |
+
payload = {
|
32 |
+
"model": "numind/NuMarkdown-8B-Thinking",
|
33 |
+
"messages": messages,
|
34 |
+
"max_tokens": max_tokens,
|
35 |
+
"temperature": temperature
|
36 |
+
}
|
37 |
+
|
38 |
+
try:
|
39 |
+
response = requests.post(
|
40 |
+
"http://localhost:8000/v1/chat/completions",
|
41 |
+
json=payload,
|
42 |
+
timeout=60
|
43 |
+
)
|
44 |
+
response.raise_for_status()
|
45 |
+
data = response.json()
|
46 |
+
|
47 |
+
result = data["choices"][0]["message"]["content"]
|
48 |
+
reasoning = result.split("<think>")[1].split("</think>")[0]
|
49 |
+
answer = result.split("<answer>")[1].split("</answer>")[0]
|
50 |
+
|
51 |
+
return reasoning, answer, answer
|
52 |
+
except requests.exceptions.RequestException as e:
|
53 |
+
return f"API request failed: {e}"
|
54 |
+
|
55 |
+
with gr.Blocks(title="NuMarkdown-8B-Thinking", theme=gr.themes.Soft()) as demo:
|
56 |
+
# Clean banner with centered content
|
57 |
+
gr.HTML("""
|
58 |
+
<div style="text-align: center; padding: 20px; background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); border-radius: 10px; margin-bottom: 20px;">
|
59 |
+
<h1 style="color: white; margin: 0; font-size: 2.5em; font-weight: bold;">๐๏ธ NuMarkdown-8B-Thinking</h1>
|
60 |
+
<p style="color: rgba(255,255,255,0.9); margin: 10px 0; font-size: 1.2em;">Upload an image to convert to Markdown!</p>
|
61 |
+
<div style="margin-top: 15px;">
|
62 |
+
<a href="https://nuextract.ai/" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ฅ๏ธ API / Platform</a>
|
63 |
+
<span style="color: rgba(255,255,255,0.7);">|</span>
|
64 |
+
<a href="https://discord.gg/3tsEtJNCDe" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ฃ๏ธ Discord</a>
|
65 |
+
<span style="color: rgba(255,255,255,0.7);">|</span>
|
66 |
+
<a href="https://github.com/numindai/NuMarkdown" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ GitHub</a>
|
67 |
+
<span style="color: rgba(255,255,255,0.7);">|</span>
|
68 |
+
<a href="https://huggingface.co/numind/NuMarkdown-8B-Thinking" style="color: white; text-decoration: none; margin: 0 10px; font-weight: 500;">๐ค Model</a>
|
69 |
+
</div>
|
70 |
+
</div>
|
71 |
+
|
72 |
+
<p>NuMarkdown-8B-Thinking is the first reasoning OCR VLM. It is specifically trained to convert documents into clean Markdown files, well suited for RAG applications. It generates thinking tokens to figure out the layout of the document before generating the Markdown file. It is particularly good at understanding documents with weird layouts and complex tables.</p>
|
73 |
+
<p>NOTE: In this space we downsize large images and restrict the maximum output of the model, so performance could improve if you run the model yourself.</p>
|
74 |
+
""")
|
75 |
+
|
76 |
+
with gr.Row():
|
77 |
+
with gr.Column():
|
78 |
+
temperature = gr.Slider(0.1, 1.5, value=0.6, step=0.1, label="Temperature")
|
79 |
+
img_in = gr.Image(type="pil", label="Upload Image")
|
80 |
+
btn = gr.Button("Generate Response")
|
81 |
+
with gr.Column():
|
82 |
+
thinking = gr.Textbox(label="Thinking Trace", lines=10)
|
83 |
+
raw_answer = gr.Textbox(label="Raw Output", lines=5)
|
84 |
+
output = gr.Markdown(label="Response")
|
85 |
+
|
86 |
+
btn.click(
|
87 |
+
query_vllm_api,
|
88 |
+
inputs=[img_in, temperature],
|
89 |
+
outputs=[thinking, raw_answer, output],
|
90 |
+
)
|
91 |
+
|
92 |
+
if __name__ == "__main__":
|
93 |
+
demo.launch(share=True)
|
numarkdown.svg
ADDED
|
start.sh
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
|
3 |
+
echo "Starting vLLM server..."
|
4 |
+
# Start vLLM in the background with logging
|
5 |
+
python3 -m vllm.entrypoints.openai.api_server \
|
6 |
+
--model numind/NuMarkdown-8B-Thinking \
|
7 |
+
--port 8000 \
|
8 |
+
--host 0.0.0.0 \
|
9 |
+
--max-model-len 20000 \
|
10 |
+
--gpu-memory-utilization 0.6 > $HOME/app/vllm.log 2>&1 &
|
11 |
+
|
12 |
+
VLLM_PID=$!
|
13 |
+
echo "vLLM started with PID: $VLLM_PID"
|
14 |
+
|
15 |
+
# Wait for vLLM to be ready
|
16 |
+
echo "Waiting for vLLM server to start..."
|
17 |
+
for i in {1..90}; do
|
18 |
+
if curl -s http://localhost:8000/v1/models > /dev/null; then
|
19 |
+
echo "vLLM server is ready!"
|
20 |
+
break
|
21 |
+
fi
|
22 |
+
echo "Waiting... ($i/90)"
|
23 |
+
sleep 2
|
24 |
+
done
|
25 |
+
|
26 |
+
# Check if vLLM is actually running
|
27 |
+
if ! curl -s http://localhost:8000/v1/models > /dev/null; then
|
28 |
+
echo "ERROR: vLLM server failed to start!"
|
29 |
+
echo "vLLM logs:"
|
30 |
+
cat $HOME/app/vllm.log
|
31 |
+
exit 1
|
32 |
+
fi
|
33 |
+
|
34 |
+
echo "Starting Gradio app..."
|
35 |
+
# Start Gradio app in the foreground
|
36 |
+
python3 $HOME/app/app.py
|