ebi commited on
Commit
a2c2132
·
1 Parent(s): 27c8941
Files changed (3) hide show
  1. Dockerfile +29 -0
  2. README.md +7 -5
  3. app.py +54 -0
Dockerfile ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
2
+
3
+ # 1. System packages
4
+ RUN apt-get update && apt-get install -y \
5
+ python3 python3-pip python3-dev build-essential ninja-build git \
6
+ && rm -rf /var/lib/apt/lists/*
7
+
8
+ # 2. Upgrade pip
9
+ RUN pip install --upgrade pip
10
+
11
+ # 3. Install GPU-compatible Torch
12
+ RUN pip install torch==2.1.0+cu121 -f https://download.pytorch.org/whl/cu121
13
+
14
+ # 4. Install flash_attn
15
+ RUN pip install flash_attn
16
+
17
+ # 5. Install other Python libraries you need
18
+ RUN pip install transformers gradio
19
+
20
+ # 6. Copy your application code into the container
21
+ WORKDIR /app
22
+ COPY . /app
23
+
24
+ # 7. Expose port 7860 (Gradio default)
25
+ EXPOSE 7860
26
+
27
+ # 8. Launch your app
28
+ CMD ["python3", "app.py"]
29
+
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Autotrain
3
- emoji: 📈
4
- colorFrom: purple
5
  colorTo: purple
6
- sdk: docker
 
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Autotrain Deepseek
3
+ emoji: 💬
4
+ colorFrom: yellow
5
  colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.0.1
8
+ app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ An example chatbot using [Gradio](https://gradio.app), [`huggingface_hub`](https://huggingface.co/docs/huggingface_hub/v0.22.2/en/index), and the [Hugging Face Inference API](https://huggingface.co/docs/api-inference/index).
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+
4
+ def load_model():
5
+ """
6
+ Load the DeepSeek-R1 model.
7
+ Note: We rely on flash_attn, so this should now work
8
+ once PyTorch+CUDA and flash_attn are installed.
9
+ """
10
+ try:
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ "deepseek-ai/DeepSeek-R1",
13
+ trust_remote_code=True
14
+ )
15
+ tokenizer = AutoTokenizer.from_pretrained(
16
+ "deepseek-ai/DeepSeek-R1",
17
+ trust_remote_code=True
18
+ )
19
+ # Return a text-generation pipeline
20
+ return pipeline("text-generation", model=model, tokenizer=tokenizer)
21
+ except Exception as e:
22
+ return f"Model Loading Error: {e}"
23
+
24
+ model_pipeline = load_model()
25
+
26
+ def process_text(input_text):
27
+ """
28
+ Uses the loaded DeepSeek-R1 pipeline to generate text.
29
+ """
30
+ if isinstance(model_pipeline, str):
31
+ return f"Error: {model_pipeline}" # If model_pipeline is an error string
32
+ try:
33
+ # Adjust parameters as desired
34
+ outputs = model_pipeline(input_text, max_length=200, num_return_sequences=1)
35
+ return outputs[0]["generated_text"]
36
+ except Exception as e:
37
+ return f"Inference Error: {e}"
38
+
39
+ with gr.Blocks() as demo:
40
+ gr.Markdown(
41
+ "# DeepSeek-R1 Text Generator\n"
42
+ "Enter a prompt and generate text using the DeepSeek-R1 model."
43
+ )
44
+ input_box = gr.Textbox(
45
+ lines=5, label="Input Prompt", placeholder="Type your prompt here..."
46
+ )
47
+ generate_btn = gr.Button("Generate")
48
+ output_box = gr.Textbox(
49
+ lines=10, label="Generated Text", placeholder="Generated text appears here..."
50
+ )
51
+
52
+ generate_btn.click(fn=process_text, inputs=input_box, outputs=output_box)
53
+
54
+ demo.launch()