Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -22,7 +22,7 @@ examples=[
|
|
22 |
|
23 |
|
24 |
# Stream text
|
25 |
-
def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=4096):
|
26 |
|
27 |
client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
|
28 |
return client.predict(
|
@@ -35,13 +35,51 @@ def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=
|
|
35 |
api_name="/chat_1"
|
36 |
)
|
37 |
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
|
42 |
# Gradio Demo
|
43 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
44 |
gr.DuplicateButton()
|
45 |
-
gr.ChatInterface(predict, title=title, description=description, css=css, examples=examples)
|
46 |
|
47 |
demo.queue().launch(debug=True)
|
|
|
22 |
|
23 |
|
24 |
# Stream text
|
25 |
+
def predict(message, chatbot, system_prompt="", temperature=0.9, max_new_tokens=4096, top_p=0.6, repetition_penalty=1.0,):
|
26 |
|
27 |
client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
|
28 |
return client.predict(
|
|
|
35 |
api_name="/chat_1"
|
36 |
)
|
37 |
|
38 |
+
additional_inputs=[
|
39 |
+
gr.Textbox("", label="Optional system prompt"),
|
40 |
+
gr.Slider(
|
41 |
+
label="Temperature",
|
42 |
+
value=0.9,
|
43 |
+
minimum=0.0,
|
44 |
+
maximum=1.0,
|
45 |
+
step=0.05,
|
46 |
+
interactive=True,
|
47 |
+
info="Higher values produce more diverse outputs",
|
48 |
+
),
|
49 |
+
gr.Slider(
|
50 |
+
label="Max new tokens",
|
51 |
+
value=256,
|
52 |
+
minimum=0,
|
53 |
+
maximum=4096,
|
54 |
+
step=64,
|
55 |
+
interactive=True,
|
56 |
+
info="The maximum numbers of new tokens",
|
57 |
+
),
|
58 |
+
gr.Slider(
|
59 |
+
label="Top-p (nucleus sampling)",
|
60 |
+
value=0.6,
|
61 |
+
minimum=0.0,
|
62 |
+
maximum=1,
|
63 |
+
step=0.05,
|
64 |
+
interactive=True,
|
65 |
+
info="Higher values sample more low-probability tokens",
|
66 |
+
),
|
67 |
+
gr.Slider(
|
68 |
+
label="Repetition penalty",
|
69 |
+
value=1.2,
|
70 |
+
minimum=1.0,
|
71 |
+
maximum=2.0,
|
72 |
+
step=0.05,
|
73 |
+
interactive=True,
|
74 |
+
info="Penalize repeated tokens",
|
75 |
+
)
|
76 |
+
]
|
77 |
|
78 |
|
79 |
|
80 |
# Gradio Demo
|
81 |
with gr.Blocks(theme=gr.themes.Base()) as demo:
|
82 |
gr.DuplicateButton()
|
83 |
+
gr.ChatInterface(predict, title=title,additional_inputs=additional_inputs, description=description, css=css, examples=examples)
|
84 |
|
85 |
demo.queue().launch(debug=True)
|