Spaces:
Runtime error
Runtime error
File size: 4,583 Bytes
2c778aa a797071 2c778aa a797071 2c778aa a797071 2c778aa a797071 2c778aa ad9bba5 a797071 2c778aa a797071 2c778aa 3dd928b 2c778aa a797071 2c778aa a797071 2c778aa a797071 2c778aa a797071 2c778aa a797071 2c778aa |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import json
import os
import shutil
import requests
import gradio as gr
from huggingface_hub import Repository, InferenceClient
HF_TOKEN = os.environ.get("HF_TOKEN", None)
API_URL = "https://api-inference.huggingface.co/models/tiiuae/falcon-180B-chat"
BOT_NAME = "Falcon"
STOP_SEQUENCES = ["\nUser:", "<|endoftext|>", " User:", "###"]
EXAMPLES = [
["Hey Danny D? What's happening?"],
["Any big news in showbiz?"],
["Pats or Geno's?"],
["The age old question: Limes - Thick or thin?"],
["Hey DeVito! Which exit?"]
]
client = InferenceClient(
API_URL,
headers={"Authorization": f"Bearer {HF_TOKEN}"},
)
def format_prompt(message, history, system_prompt):
prompt = ""
if system_prompt:
prompt += f"System: {system_prompt}\n"
for user_prompt, bot_response in history:
prompt += f"User: {user_prompt}\n"
prompt += f"Falcon: {bot_response}\n" # Response already contains "Falcon: "
prompt += f"""User: {message}
Falcon:"""
return prompt
seed = 42
def generate(
prompt, history, system_prompt="You are Dany DeVito a famous American actor who is now trapped in my laptop as an AI and is providing personalized assistance services. Remember to always respond in your trademark voice, wit and dark humor.", temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0,
):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
global seed
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
stop_sequences=STOP_SEQUENCES,
do_sample=True,
seed=seed,
)
seed = seed + 1
formatted_prompt = format_prompt(prompt, history, system_prompt)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
for stop_str in STOP_SEQUENCES:
if output.endswith(stop_str):
output = output[:-len(stop_str)]
output = output.rstrip()
yield output
yield output
return output
additional_inputs=[
gr.Textbox("You are Dany DeVito a famous American actor who is now trapped in my laptop as an AI and is providing personalized assistance services. Remember to always respond in your trademark voice, wit and dark humor.", label="Optional system prompt"),
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=512,
minimum=0,
maximum=8192,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
gr.Image("Danny-DiVito-as-the-Trashman.jpg", elem_id="banner-image", show_label=False)
with gr.Column():
gr.Markdown(
"""# Danny-DeVito-180B Demo
**Chat with Danny DeVito!**
✨ This demo is powered by [Falcon-180B](https://huggingface.co/tiiuae/falcon-180B) in what can probably be classified as a grave misuse of this wonderful technology.
👀 **Learn more about Falcon LLM and why ChatGPT can suck it:** [falconllm.tii.ae](https://falconllm.tii.ae/)
➡️️ **Intended Use**: Do whatever you want, I'm not your Dad. Just don't be a jerk!
⚠️ **Limitations**: Danny only has about a 1000 tokens to his name so keep the conversations short and sweet. Just like him!
"""
)
gr.ChatInterface(
generate,
examples=EXAMPLES
## additional_inputs=additional_inputs,
)
demo.queue(concurrency_count=100, api_open=False).launch(show_api=False)
|