File size: 2,668 Bytes
5442803
 
262c871
 
20ceb06
5442803
 
 
262c871
5442803
 
 
 
 
 
 
 
 
262c871
 
5442803
262c871
 
 
 
 
dd84c22
 
262c871
 
 
5442803
 
 
262c871
 
5442803
262c871
 
5442803
262c871
ce1cade
5442803
e94a11f
ce1cade
20ceb06
5442803
20ceb06
 
 
 
 
 
 
 
5442803
20ceb06
 
5442803
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b0e47c3
262c871
5442803
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
from huggingface_hub import InferenceClient
from langchain_community.chat_models import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")


def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
    model_name="llama3-8b",
    api_key=None
):
    client = ChatOllama(
        model=model_name,
        base_url="https://lintasmediadanawa-hf-llm-api.hf.space", 
        headers={"Authorization": f"Bearer {api_key}"},
        temperature=temperature,
        # top_p=top_p,
        # max_tokens=max_tokens
    )
    
    messages = [("system", system_message)]

    for val in history:
        if val[0]:
            # messages.append({"role": "user", "content": val[0]})
            messages.append(("human", val[0]))
        if val[1]:
            # messages.append({"role": "assistant", "content": val[1]})
            messages.append(("ai", val[1]))

    # messages.append({"role": "user", "content": message})
    messages.append(("user", "{input}"))

    chain = ChatPromptTemplate.from_messages(messages) | client | StrOutputParser()
    return chain.invoke({'input':message})
    # response = ""

    # for message in client.chat_completion(
    #     messages,
    #     max_tokens=max_tokens,
    #     stream=True,
    #     temperature=temperature,
    #     top_p=top_p,
    # ):
    #     token = message.choices[0].delta.content

    #     response += token
    #     yield response

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
        gr.Textbox(value="llama3-8b", label="Available Model Name, please refer to https://lintasmediadanawa-hf-llm-api.hf.space/api/tags"),
        gr.Textbox(value="hf_xxx", label="Huggingface API key")
    ],
)


if __name__ == "__main__":
    demo.launch()