Spaces:

suwesh
/

SFAI_SalesCoach_PR

Sleeping

File size: 2,644 Bytes

2e0f8aa
06daacc
2e0f8aa
 
 
 
 
06daacc
0783741
2e0f8aa
 
 
 
 
 
bb316b0
 
 
 
 
2e0f8aa
 
 
 
 
 
bb316b0
 
9f85689
 
 
 
 
 
 
 
 
0783741
06daacc
 
 
 
 
a27ae3d
06daacc
9f85689
06daacc
253665d
06daacc
 
 
 
4208c01
 
06daacc
 
 
 
 
9f85689
 
2e0f8aa
 
 
9f85689
 
2e0f8aa
bb316b0
2e0f8aa
 
 
 
 
 
 
 
 
 
 
 
 
 
06daacc

import gradio as gr
from huggingface_hub import InferenceClient
from transformers import pipeline

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")

modelpath = "distilgpt2"

pipe = pipeline(
    "text-generation",
    model=modelpath
)
#messages = [
#    {"role": "system", "content": "You are a customer applying for a housing loan in India. Provide dummy details about your application and negotiate the terms."},
#    {"role": "user", "content": "Hi!Welcome to Hero Housing Finance!"},
#    {"role": "assistant", "content": "Hello, I would like to apply for a loan."},
#]
#outputs = pipe(
#    messages,
#    max_new_tokens=256,
#)
#print(outputs[0]["generated_text"][-1])

system_message = "You are a Technical Support Assistant. Read the Context and generate only the summary of the answer to the Query based on your understanding of the <Question> <Answer> pairs in the context."

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})

    messages.append({"role": "user", "content": message})

    response = ""

    for message in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        token = message.choices[0].delta.content

        response += token
        yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a Technical Support Assistant. Read the Context and generate only the summary of the answer to the Query based on your understanding of the <Question> <Answer> pairs in the context.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()