ebi
commited on
Commit
·
a2c2132
1
Parent(s):
27c8941
- Dockerfile +29 -0
- README.md +7 -5
- app.py +54 -0
Dockerfile
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
|
2 |
+
|
3 |
+
# 1. System packages
|
4 |
+
RUN apt-get update && apt-get install -y \
|
5 |
+
python3 python3-pip python3-dev build-essential ninja-build git \
|
6 |
+
&& rm -rf /var/lib/apt/lists/*
|
7 |
+
|
8 |
+
# 2. Upgrade pip
|
9 |
+
RUN pip install --upgrade pip
|
10 |
+
|
11 |
+
# 3. Install GPU-compatible Torch
|
12 |
+
RUN pip install torch==2.1.0+cu121 -f https://download.pytorch.org/whl/cu121
|
13 |
+
|
14 |
+
# 4. Install flash_attn
|
15 |
+
RUN pip install flash_attn
|
16 |
+
|
17 |
+
# 5. Install other Python libraries you need
|
18 |
+
RUN pip install transformers gradio
|
19 |
+
|
20 |
+
# 6. Copy your application code into the container
|
21 |
+
WORKDIR /app
|
22 |
+
COPY . /app
|
23 |
+
|
24 |
+
# 7. Expose port 7860 (Gradio default)
|
25 |
+
EXPOSE 7860
|
26 |
+
|
27 |
+
# 8. Launch your app
|
28 |
+
CMD ["python3", "app.py"]
|
29 |
+
|
README.md
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
---
|
2 |
-
title: Autotrain
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: purple
|
6 |
-
sdk:
|
|
|
|
|
7 |
pinned: false
|
8 |
---
|
9 |
|
10 |
-
|
|
|
1 |
---
|
2 |
+
title: Autotrain Deepseek
|
3 |
+
emoji: 💬
|
4 |
+
colorFrom: yellow
|
5 |
colorTo: purple
|
6 |
+
sdk: gradio
|
7 |
+
sdk_version: 5.0.1
|
8 |
+
app_file: app.py
|
9 |
pinned: false
|
10 |
---
|
11 |
|
12 |
+
An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
|
app.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
|
3 |
+
|
4 |
+
def load_model():
|
5 |
+
"""
|
6 |
+
Load the DeepSeek-R1 model.
|
7 |
+
Note: We rely on flash_attn, so this should now work
|
8 |
+
once PyTorch+CUDA and flash_attn are installed.
|
9 |
+
"""
|
10 |
+
try:
|
11 |
+
model = AutoModelForCausalLM.from_pretrained(
|
12 |
+
"deepseek-ai/DeepSeek-R1",
|
13 |
+
trust_remote_code=True
|
14 |
+
)
|
15 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
16 |
+
"deepseek-ai/DeepSeek-R1",
|
17 |
+
trust_remote_code=True
|
18 |
+
)
|
19 |
+
# Return a text-generation pipeline
|
20 |
+
return pipeline("text-generation", model=model, tokenizer=tokenizer)
|
21 |
+
except Exception as e:
|
22 |
+
return f"Model Loading Error: {e}"
|
23 |
+
|
24 |
+
model_pipeline = load_model()
|
25 |
+
|
26 |
+
def process_text(input_text):
|
27 |
+
"""
|
28 |
+
Uses the loaded DeepSeek-R1 pipeline to generate text.
|
29 |
+
"""
|
30 |
+
if isinstance(model_pipeline, str):
|
31 |
+
return f"Error: {model_pipeline}" # If model_pipeline is an error string
|
32 |
+
try:
|
33 |
+
# Adjust parameters as desired
|
34 |
+
outputs = model_pipeline(input_text, max_length=200, num_return_sequences=1)
|
35 |
+
return outputs[0]["generated_text"]
|
36 |
+
except Exception as e:
|
37 |
+
return f"Inference Error: {e}"
|
38 |
+
|
39 |
+
with gr.Blocks() as demo:
|
40 |
+
gr.Markdown(
|
41 |
+
"# DeepSeek-R1 Text Generator\n"
|
42 |
+
"Enter a prompt and generate text using the DeepSeek-R1 model."
|
43 |
+
)
|
44 |
+
input_box = gr.Textbox(
|
45 |
+
lines=5, label="Input Prompt", placeholder="Type your prompt here..."
|
46 |
+
)
|
47 |
+
generate_btn = gr.Button("Generate")
|
48 |
+
output_box = gr.Textbox(
|
49 |
+
lines=10, label="Generated Text", placeholder="Generated text appears here..."
|
50 |
+
)
|
51 |
+
|
52 |
+
generate_btn.click(fn=process_text, inputs=input_box, outputs=output_box)
|
53 |
+
|
54 |
+
demo.launch()
|