import gradio as gr from text_generation import Client hf_api_key = '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' #FalcomLM-instruct endpoint on the text_generation library #client = Client("https://api-inference.huggingface.co/models/tiiuae/falcon-40b-instruct", headers={"Authorization": f"Bearer {hf_api_key}"}, timeout=120) #client = Client("https://wjmh73a2pphfr6ed.us-east-1.aws.endpoints.huggingface.cloud", headers={"Authorization": f"Bearer {hf_api_key}"}, timeout=120) client = Client("https://api-inference.huggingface.co/models/tiiuae/falcon-7b-instruct", headers={"Authorization": f"Bearer {hf_api_key}"}, timeout=120) def generate(input): output = client.generate(input,max_new_tokens=1024).generated_text return output def respond(message, chat_history): #No LLM here, just respond with a random pre-made message '''bot_message = random.choice(["Tell me more about it", "Cool, but I'm not interested", "Hmmmm, ok then"]) ''' bot_message = generate(message) chat_history.append((message, bot_message)) return "", chat_history with gr.Blocks() as demo: chatbot = gr.Chatbot() #just to fit the notebook msg = gr.Textbox(label="Prompt") btn = gr.Button("Submit") clear = gr.ClearButton(components=[msg, chatbot], value="Clear console") btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot]) #Press enter to submit demo.launch(height=240)