kirp commited on
Commit
ba444f5
·
1 Parent(s): 01f5916
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -17,7 +17,7 @@ snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
17
 
18
  model = Llama(
19
  model_path=model_name,
20
- n_ctx=2000,
21
  n_parts=1,
22
  )
23
 
@@ -29,7 +29,7 @@ def generate(
29
  temperature=0.1,
30
  top_p=0.75,
31
  top_k=40,
32
- max_new_tokens=128,
33
  ):
34
  prompt = template.format(input)
35
  output = model(prompt,
@@ -52,7 +52,7 @@ g = gr.Interface(
52
  gr.components.Slider(minimum=0, maximum=1, value=0.8, label="Top p"),
53
  gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
54
  gr.components.Slider(
55
- minimum=1, maximum=1024, step=1, value=256, label="Max tokens"
56
  ),
57
  ],
58
  outputs=[
@@ -64,5 +64,5 @@ g = gr.Interface(
64
  title="tinyllama-1.1b-chat gguf",
65
  description=''
66
  )
67
- g.queue(concurrency_count=1)
68
  g.launch()
 
17
 
18
  model = Llama(
19
  model_path=model_name,
20
+ n_ctx=2048,
21
  n_parts=1,
22
  )
23
 
 
29
  temperature=0.1,
30
  top_p=0.75,
31
  top_k=40,
32
+ max_new_tokens=512,
33
  ):
34
  prompt = template.format(input)
35
  output = model(prompt,
 
52
  gr.components.Slider(minimum=0, maximum=1, value=0.8, label="Top p"),
53
  gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
54
  gr.components.Slider(
55
+ minimum=1, maximum=2048, step=1, value=512, label="Max tokens"
56
  ),
57
  ],
58
  outputs=[
 
64
  title="tinyllama-1.1b-chat gguf",
65
  description=''
66
  )
67
+ g.queue(concurrency_count=2)
68
  g.launch()