falcon-180b-demo

Runtime error

futranbg commited on Nov 8, 2023

Commit

aac3374

1 Parent(s): 0caf6b4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,8 +3,15 @@ import gradio as gr
 from huggingface_hub import InferenceClient
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-API_URL = "meta-llama/Llama-2-70b-chat-hf"
-API_URL_2 = "codellama/CodeLlama-34b-Instruct-hf"
 BOT_NAME = "Assistant"
 STOP_SEQUENCES = ["\nUser:", " User:", "###", "</s>"]
@@ -17,16 +24,6 @@ EXAMPLES = [
     ["Can you write a short tweet about the release of our latest AI model, LLAMA LLM?"]
     ]
-client = InferenceClient(
-    API_URL,
-    token=HF_TOKEN,
-)
-client2 = InferenceClient(
-    API_URL_2,
-    token=HF_TOKEN,
-)
 def format_prompt(message, history, system_prompt):
   prompt = ""
   if system_prompt:
@@ -59,6 +56,13 @@ def generate(
     seed = seed + 1
     formatted_prompt = format_prompt(prompt, history, system_prompt)
     try:
         stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
         output = ""

 from huggingface_hub import InferenceClient
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+model2api = [
+             "tiiuae/falcon-180B-chat"
+             "meta-llama/Llama-2-70b-chat-hf",
+             "codellama/CodeLlama-34b-Instruct-hf",
+             "victor/CodeLlama-34b-Instruct-hf",
+             "timdettmers/guanaco-33b-merged",
+]
 BOT_NAME = "Assistant"
 STOP_SEQUENCES = ["\nUser:", " User:", "###", "</s>"]
     ["Can you write a short tweet about the release of our latest AI model, LLAMA LLM?"]
     ]
 def format_prompt(message, history, system_prompt):
   prompt = ""
   if system_prompt:
     seed = seed + 1
     formatted_prompt = format_prompt(prompt, history, system_prompt)
+    client = InferenceClient()
+    clientList = (client.list_deployed_models('text-generation-inference'))['text-generation']
+    for model in model2api:
+        if model in clientList:
+            client = InferenceClient(model, token=HF_TOKEN)
+            print(f"Choosen model: {model}")
     try:
         stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
         output = ""