File size: 4,583 Bytes
2c778aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a797071
 
 
 
 
2c778aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a797071
2c778aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a797071
2c778aa
 
 
 
 
 
 
 
 
 
 
a797071
2c778aa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ad9bba5
a797071
2c778aa
 
a797071
2c778aa
3dd928b
2c778aa
a797071
2c778aa
a797071
2c778aa
a797071
2c778aa
a797071
2c778aa
 
 
 
 
a797071
 
2c778aa
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
import json
import os
import shutil
import requests

import gradio as gr
from huggingface_hub import Repository, InferenceClient

HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
BOT_NAME = "Falcon"

STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]

EXAMPLES = [
    ["Hey Danny D? What's happening?"],
    ["Any big news in showbiz?"],
    ["Pats or Geno's?"],
    ["The age old question: Limes - Thick or thin?"],
    ["Hey DeVito! Which exit?"]
    ]

client = InferenceClient(
    API_URL,
    headers={"Authorization": f"Bearer {HF_TOKEN}"},
)

def format_prompt(message, history, system_prompt):
  prompt = ""
  if system_prompt:
    prompt += f"System: {system_prompt}\n"
  for user_prompt, bot_response in history:
    prompt += f"User: {user_prompt}\n"
    prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
  prompt += f"""User: {message}
Falcon:"""
  return prompt

seed = 42

def generate(
    prompt, history, system_prompt="You are Dany DeVito a famous American actor who is now trapped in my laptop as an AI and is providing personalized assistance services. Remember to always respond in your trademark voice, wit and dark humor.", temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)
    global seed
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        stop_sequences=STOP_SEQUENCES,
        do_sample=True,
        seed=seed,
    )
    seed = seed + 1
    formatted_prompt = format_prompt(prompt, history, system_prompt)

    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text

        for stop_str in STOP_SEQUENCES:
            if output.endswith(stop_str):
                output = output[:-len(stop_str)]
                output = output.rstrip()
                yield output
        yield output
    return output


additional_inputs=[
    gr.Textbox("You are Dany DeVito a famous American actor who is now trapped in my laptop as an AI and is providing personalized assistance services. Remember to always respond in your trademark voice, wit and dark humor.", label="Optional system prompt"),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=512,
        minimum=0,
        maximum=8192,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]


with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            gr.Image("Danny-DiVito-as-the-Trashman.jpg", elem_id="banner-image", show_label=False)
        with gr.Column():
            gr.Markdown(
                """# Danny-DeVito-180B Demo

                **Chat with Danny DeVito!**
                
                ✨ This demo is powered by [Falcon-180B](https://huggingface.co/tiiuae/falcon-180B) in what can probably be classified as a grave misuse of this wonderful technology. 
                
                👀 **Learn more about Falcon LLM and why ChatGPT can suck it:** [falconllm.tii.ae](https://falconllm.tii.ae/)
                
                ➡️️ **Intended Use**: Do whatever you want, I'm not your Dad. Just don't be a jerk!
                
                ⚠️ **Limitations**: Danny only has about a 1000 tokens to his name so keep the conversations short and sweet. Just like him!
                """
            )

    gr.ChatInterface(
        generate, 
        examples=EXAMPLES
       ## additional_inputs=additional_inputs,
    ) 

demo.queue(concurrency_count=100, api_open=False).launch(show_api=False)