File size: 2,991 Bytes
1bfc1de
 
 
 
 
cbfc805
a99204e
 
1bfc1de
 
aa0977e
a99204e
cbfc805
a99204e
 
 
cbfc805
 
a99204e
1bfc1de
a99204e
ffc6a33
 
 
 
 
 
 
 
db9b37f
b2c2948
ffc6a33
 
a99204e
1bfc1de
 
4d9f180
 
556ade6
859b999
2b62c2d
4d9f180
 
 
a322642
1bfc1de
 
 
 
c8b330d
 
 
 
1bfc1de
c8b330d
99ab7c8
1bfc1de
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import gradio as gr
from transformers import pipeline, AutoTokenizer

def generate(
    model_name,
    system_input,
    user_initial_message,
    assistant_initial_message,
    user_input,
):
    pipe = pipeline("text-generation", model=model_name, device="cpu")
    
    message_template = [
        {"role": "system", "content": system_input},
        {"role": "user", "content": user_initial_message},
        {"role": "assistant", "content": assistant_initial_message},
        {"role": "user", "content": user_input},
    ]
    
    prompt = pipe.tokenizer.apply_chat_template(message_template, tokenize=False, add_generation_prompt=True)
    
    if model_name == "Felladrin/Pythia-31M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=2, repetition_penalty=1.0016)
    elif model_name == "Felladrin/Llama-68M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.043)
    elif model_name == "Felladrin/Smol-Llama-101M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.105)
    elif model_name == "Felladrin/Llama-160M-Chat-v1":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=4, repetition_penalty=1.01)
    elif model_name == "Felladrin/TinyMistral-248M-Chat-v4":
        outputs = pipe(prompt, max_new_tokens=250, use_cache=True, penalty_alpha=0.5, top_k=5, repetition_penalty=1.0)
    else:
        outputs = pipe(prompt, max_new_tokens=250, do_sample=True, temperature=0.65, top_k=35, top_p=0.55, repetition_penalty=1.176)
        
    return outputs[0]["generated_text"]

model_choices = [
    "Felladrin/Llama-160M-Chat-v1",
    "Felladrin/TinyMistral-248M-Chat-v4",
    "Felladrin/Llama-68M-Chat-v1",
    "Felladrin/Minueza-32M-UltraChat",
    "Felladrin/Smol-Llama-101M-Chat-v1",
    "Felladrin/Pythia-31M-Chat-v1",
]

g = gr.Interface(
    fn=generate,
    inputs=[
        gr.components.Dropdown(choices=model_choices, label="Model", value=model_choices[0], interactive=True),
        gr.components.Textbox(lines=2, label="System Message", value="You are a highly knowledgeable and friendly assistant. Your goal is to understand and respond to user inquiries with clarity. Your interactions are always respectful, helpful, and focused on delivering the most accurate information to the user."),
        gr.components.Textbox(lines=2, label="User Initial Message", value="Hey! Got a question for you!"),
        gr.components.Textbox(lines=2, label="Assistant Initial Message", value="Sure! What's it?"),
        gr.components.Textbox(lines=2, label="User Message", value="Can you list some potential applications for quantum computing?"),
    ],
    outputs=[gr.Textbox(lines=24, label="Output")],
    title="A place to try out text-generation models fine-tuned by Felladrin",
    concurrency_limit=1
)

g.launch(max_threads=2)