Spaces:
Sleeping
Sleeping
File size: 2,991 Bytes
1bfc1de cbfc805 a99204e 1bfc1de aa0977e a99204e cbfc805 a99204e cbfc805 a99204e 1bfc1de a99204e ffc6a33 db9b37f b2c2948 ffc6a33 a99204e 1bfc1de 4d9f180 556ade6 859b999 2b62c2d 4d9f180 a322642 1bfc1de c8b330d 1bfc1de c8b330d 99ab7c8 1bfc1de |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
from transformers import pipeline, AutoTokenizer
def generate(
model_name,
system_input,
user_initial_message,
assistant_initial_message,
user_input,
):
pipe = pipeline("text-generation", model=model_name, device="cpu")
message_template = [
{"role": "system", "content": system_input},
{"role": "user", "content": user_initial_message},
{"role": "assistant", "content": assistant_initial_message},
{"role": "user", "content": user_input},
]
prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
if model_name == "Felladrin/Pythia-31M-Chat-v1":
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=2, repetition_penalty=1.0016)
elif model_name == "Felladrin/Llama-68M-Chat-v1":
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.043)
elif model_name == "Felladrin/Smol-Llama-101M-Chat-v1":
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.105)
elif model_name == "Felladrin/Llama-160M-Chat-v1":
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.01)
elif model_name == "Felladrin/TinyMistral-248M-Chat-v4":
outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=5, repetition_penalty=1.0)
else:
outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.65, top_k=35, top_p=0.55, repetition_penalty=1.176)
return outputs[0]["generated_text"]
model_choices = [
"Felladrin/Llama-160M-Chat-v1",
"Felladrin/TinyMistral-248M-Chat-v4",
"Felladrin/Llama-68M-Chat-v1",
"Felladrin/Minueza-32M-UltraChat",
"Felladrin/Smol-Llama-101M-Chat-v1",
"Felladrin/Pythia-31M-Chat-v1",
]
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
gr.components.Textbox(lines=2, label="System Message", value="You are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user."),
gr.components.Textbox(lines=2, label="User Initial Message", value="Hey! Got a question for you!"),
gr.components.Textbox(lines=2, label="Assistant Initial Message", value="Sure! What's it?"),
gr.components.Textbox(lines=2, label="User Message", value="Can you list some potential applications for quantum computing?"),
],
outputs=[gr.Textbox(lines=24, label="Output")],
title="A place to try out text-generation models fine-tuned by Felladrin",
concurrency_limit=1
)
g.launch(max_threads=2)
|