Amossofer commited on
Commit
421c124
·
1 Parent(s): 6c35cb3
Files changed (2) hide show
  1. app.py +50 -0
  2. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
3
+ import torch
4
+
5
+ # Load model and tokenizer with trust_remote_code=True
6
+ model_id = "PowerInfer/SmallThinker-21BA3B-Instruct"
7
+
8
+ tokenizer = AutoTokenizer.from_pretrained(
9
+ model_id,
10
+ trust_remote_code=True # Required for models with custom code
11
+ )
12
+
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ model_id,
15
+ device_map="cpu", # Run on CPU
16
+ torch_dtype=torch.float32, # Use float32 on CPU
17
+ trust_remote_code=True # Allow custom code execution
18
+ )
19
+
20
+ # Create text generation pipeline
21
+ generator = pipeline(
22
+ "text-generation",
23
+ model=model,
24
+ tokenizer=tokenizer,
25
+ device=-1 # CPU
26
+ )
27
+
28
+ # Define the chat function
29
+ def chat(prompt, max_new_tokens=256, temperature=0.7):
30
+ output = generator(
31
+ prompt,
32
+ max_new_tokens=max_new_tokens,
33
+ temperature=temperature,
34
+ do_sample=True,
35
+ pad_token_id=tokenizer.eos_token_id
36
+ )
37
+ return output[0]["generated_text"]
38
+
39
+ # Launch the Gradio interface
40
+ gr.Interface(
41
+ fn=chat,
42
+ inputs=[
43
+ gr.Textbox(label="Prompt", lines=4, placeholder="Ask anything..."),
44
+ gr.Slider(32, 512, value=256, step=16, label="Max New Tokens"),
45
+ gr.Slider(0.1, 1.5, value=0.7, step=0.1, label="Temperature")
46
+ ],
47
+ outputs=gr.Textbox(label="Response"),
48
+ title="💬 SmallThinker-21BA3B-Instruct",
49
+ description="Run PowerInfer/SmallThinker-21BA3B-Instruct locally on CPU using Hugging Face + Gradio"
50
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio>=4.44.0
2
+ torch>=2.1.0
3
+ transformers==4.53.3
4
+ accelerate>=0.27.0
5
+ sentencepiece # required for some tokenizers
6
+ safetensors # faster & safer model loading
7
+ hf_xet