EITD commited on
Commit
2a078b6
·
1 Parent(s): ac446a2

textstreamer

Browse files
Files changed (1) hide show
  1. app.py +37 -32
app.py CHANGED
@@ -1,6 +1,6 @@
1
  from peft import AutoPeftModelForCausalLM
2
- from transformers import AutoTokenizer
3
- import gradio as gr
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
@@ -12,19 +12,19 @@ model = AutoPeftModelForCausalLM.from_pretrained(
12
  )
13
  tokenizer = AutoTokenizer.from_pretrained("EITD/lora_model_1")
14
 
15
- # messages = [{"role": "user", "content": "Continue the Fibonacci sequence: 1, 1, 2, 3, 5, 8,"},]
16
 
17
- # inputs = tokenizer.apply_chat_template(
18
- # messages,
19
- # tokenize = True,
20
- # add_generation_prompt = True, # Must add for generation
21
- # return_tensors = "pt",
22
- # )
23
 
24
- # outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True,
25
- # temperature = 1.5, min_p = 0.1)
26
 
27
- # print(tokenizer.batch_decode(outputs))
28
 
29
  def respond(
30
  message,
@@ -65,30 +65,35 @@ def respond(
65
  return_tensors = "pt",
66
  )
67
 
68
- outputs = model.generate(input_ids = inputs, max_new_tokens = max_tokens, use_cache = True,
69
- temperature = temperature, min_p = top_p)
70
- return tokenizer.batch_decode(outputs)
 
 
 
 
 
71
 
72
 
73
  """
74
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
75
  """
76
- demo = gr.ChatInterface(
77
- respond,
78
- additional_inputs=[
79
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
80
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
81
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
82
- gr.Slider(
83
- minimum=0.1,
84
- maximum=1.0,
85
- value=0.95,
86
- step=0.05,
87
- label="Top-p (nucleus sampling)",
88
- ),
89
- ],
90
- )
91
 
92
 
93
- if __name__ == "__main__":
94
- demo.launch()
 
1
  from peft import AutoPeftModelForCausalLM
2
+ from transformers import AutoTokenizer, TextStreamer
3
+ # import gradio as gr
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
 
12
  )
13
  tokenizer = AutoTokenizer.from_pretrained("EITD/lora_model_1")
14
 
15
+ messages = [{"role": "user", "content": "Continue the Fibonacci sequence: 1, 1, 2, 3, 5, 8,"},]
16
 
17
+ inputs = tokenizer.apply_chat_template(
18
+ messages,
19
+ tokenize = True,
20
+ add_generation_prompt = True, # Must add for generation
21
+ return_tensors = "pt",
22
+ )
23
 
24
+ outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True,
25
+ temperature = 1.5, min_p = 0.1)
26
 
27
+ print(tokenizer.batch_decode(outputs))
28
 
29
  def respond(
30
  message,
 
65
  return_tensors = "pt",
66
  )
67
 
68
+ # outputs = model.generate(input_ids = inputs, max_new_tokens = max_tokens, use_cache = True,
69
+ # temperature = temperature, min_p = top_p)
70
+
71
+ text_streamer = TextStreamer(tokenizer, skip_prompt = True)
72
+ model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = max_tokens,
73
+ use_cache = True, temperature = temperature, min_p = top_p)
74
+
75
+ # return tokenizer.batch_decode(outputs)
76
 
77
 
78
  """
79
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
80
  """
81
+ # demo = gr.ChatInterface(
82
+ # respond,
83
+ # additional_inputs=[
84
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
85
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
86
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
87
+ # gr.Slider(
88
+ # minimum=0.1,
89
+ # maximum=1.0,
90
+ # value=0.95,
91
+ # step=0.05,
92
+ # label="Top-p (nucleus sampling)",
93
+ # ),
94
+ # ],
95
+ # )
96
 
97
 
98
+ # if __name__ == "__main__":
99
+ # demo.launch()