import gradio as gr from huggingface_hub import InferenceClient from transformers import pipeline """ For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference """ client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") modelpath = "distilgpt2" pipe = pipeline( "text-generation", model=modelpath ) #messages = [ # {"role": "system", "content": "You are a customer applying for a housing loan in India. Provide dummy details about your application and negotiate the terms."}, # {"role": "user", "content": "Hi!Welcome to Hero Housing Finance!"}, # {"role": "assistant", "content": "Hello, I would like to apply for a loan."}, #] #outputs = pipe( # messages, # max_new_tokens=256, #) #print(outputs[0]["generated_text"][-1]) system_message = "You are a Technical Support Assistant. Read the Context and generate only the summary of the answer to the Query based on your understanding of the pairs in the context." def respond( message, history: list[tuple[str, str]], system_message, max_tokens, temperature, top_p, ): messages = [{"role": "system", "content": system_message}] for val in history: if val[0]: messages.append({"role": "user", "content": val[0]}) if val[1]: messages.append({"role": "assistant", "content": val[1]}) messages.append({"role": "user", "content": message}) response = "" for message in client.chat_completion( messages, max_tokens=max_tokens, stream=True, temperature=temperature, top_p=top_p, ): token = message.choices[0].delta.content response += token yield response """ For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface """ demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="You are a Technical Support Assistant. Read the Context and generate only the summary of the answer to the Query based on your understanding of the pairs in the context.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider( minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)", ), ], ) if __name__ == "__main__": demo.launch()