zidsi commited on
Commit
5ab747e
·
1 Parent(s): 226c90f

add params

Browse files
Files changed (1) hide show
  1. app.py +16 -17
app.py CHANGED
@@ -7,20 +7,20 @@ from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer
7
  import os
8
  HF_TOKEN = os.getenv('HF_TOKEN')
9
 
10
- checkpoint = "zidsi/SLlamica_PT4SFT_v2"
11
  device = "cuda" # "cuda" or "cpu"
12
  tokenizer = AutoTokenizer.from_pretrained(checkpoint,token=HF_TOKEN)
13
  model = AutoModelForCausalLM.from_pretrained(checkpoint,token=HF_TOKEN)
14
  model.to(device)
15
 
16
  @spaces.GPU
17
- def predict(message, history):
18
  history.append({"role": "user", "content": message})
19
  input_text = tokenizer.apply_chat_template(history, tokenize=False)
20
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
21
  # Use TextStreamer for streaming response
22
- streamer = TextStreamer(tokenizer)
23
- outputs = model.generate(inputs, max_new_tokens=512, temperature=0.2, top_p=0.9, do_sample=True, streamer=streamer)
24
  # Despite returning the usual output, the streamer will also print the generated text to stdout.
25
 
26
  decoded = tokenizer.decode(outputs[0])
@@ -32,20 +32,19 @@ For information on how to customize the ChatInterface, peruse the gradio docs: h
32
  """
33
  demo = gr.ChatInterface(
34
  predict, type="messages",
35
-
 
 
 
 
 
 
 
 
 
 
36
  )
37
- # additional_inputs=[
38
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
39
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
40
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
41
- # gr.Slider(
42
- # minimum=0.1,
43
- # maximum=1.0,
44
- # value=0.95,
45
- # step=0.05,
46
- # label="Top-p (nucleus sampling)",
47
- # ),
48
- # ],
49
 
50
  if __name__ == "__main__":
51
  demo.launch()
 
7
  import os
8
  HF_TOKEN = os.getenv('HF_TOKEN')
9
 
10
+ checkpoint = "zidsi/SLlamica_PT4SFT_v1"
11
  device = "cuda" # "cuda" or "cpu"
12
  tokenizer = AutoTokenizer.from_pretrained(checkpoint,token=HF_TOKEN)
13
  model = AutoModelForCausalLM.from_pretrained(checkpoint,token=HF_TOKEN)
14
  model.to(device)
15
 
16
  @spaces.GPU
17
+ def predict(message, history,max_new_tokens,temperature,top_p):
18
  history.append({"role": "user", "content": message})
19
  input_text = tokenizer.apply_chat_template(history, tokenize=False)
20
  inputs = tokenizer.encode(input_text, return_tensors="pt").to(device)
21
  # Use TextStreamer for streaming response
22
+ # streamer = TextStreamer(tokenizer)
23
+ outputs = model.generate(inputs, max_new_tokens=max_new_tokens, temperature=temperature, top_p=top_p, do_sample=True)
24
  # Despite returning the usual output, the streamer will also print the generated text to stdout.
25
 
26
  decoded = tokenizer.decode(outputs[0])
 
32
  """
33
  demo = gr.ChatInterface(
34
  predict, type="messages",
35
+ additional_inputs=[
36
+ gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
37
+ gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
38
+ gr.Slider(
39
+ minimum=0.1,
40
+ maximum=1.0,
41
+ value=0.95,
42
+ step=0.05,
43
+ label="Top-p (nucleus sampling)",
44
+ ),
45
+ ],
46
  )
47
+
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  if __name__ == "__main__":
50
  demo.launch()