File size: 3,588 Bytes
2e9166f 212f7f3 4e3e56f 284e795 7954b88 284e795 212f7f3 2593eec c87e284 2e9166f 212f7f3 2e9166f 0551f07 212f7f3 6d0fb48 a496ca6 2e9166f 33f5de8 93eeaf7 33f5de8 2e9166f 33f5de8 2e9166f 986770e 2e9166f ec2f726 10c0de8 33f5de8 2e9166f c87e284 33f5de8 d626b72 c87e284 e3213ee 10c0de8 af24468 e3213ee 2e9166f c87e284 c39746a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from langchain import HuggingFaceHub
from langchain.llms.base import LLM
from langchain.memory import ConversationBufferMemory,ConversationBufferWindowMemory
from langchain.chains import LLMChain, ConversationChain
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from langchain_community.llms import HuggingFaceEndpoint
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_core.prompts.chat import SystemMessagePromptTemplate, HumanMessagePromptTemplate
from huggingface_hub import InferenceClient
import os
import gradio as gr
import spaces
endpoint_url = "https://kp4xdy196cw81uf3.us-east-1.aws.endpoints.huggingface.cloud"
token = os.environ["API_TOKEN"]
llm = HuggingFaceEndpoint(
endpoint_url=f"{endpoint_url}",
huggingfacehub_api_token = f"{token}",
task = "text-generation",
max_new_tokens=128,
top_k=10,
top_p=0.95,
typical_p=0.95,
temperature=0.01,
repetition_penalty=1.03
)
#print(llm)
client = InferenceClient(endpoint_url, token=token)
gen_kwargs = dict(
max_new_tokens=128,
top_k=30,
top_p=0.95,
temperature=0.01,
repetition_penalty=1.02,
stop_sequences=["\nUser:", "<|endoftext|>", "</s>", "\nHuman:"],
)
stream = client.text_generation(prompt, stream=True, details=True, **gen_kwargs)
def chat_template_prompt():
template = """
Do not repeat questions and do not generate answer for user/human.Do not repeat yourself and do not create/generate dialogues.
Below is an instruction that describes a task. During the conversation you need to ask the user
the following questions to complete the hotel booking task. After each of the following questions you wait for the response by the user.
1) Where would you like to stay and when?
2) How many people are staying in the room?
3) Do you prefer any ammenities like breakfast included or gym?
4) What is your name, your email address and phone number?
If the user wants to book the room, you confirm the booking otherwise you respond with "Thank, you. Please let me know if there is
any other way to assist you?"
{history}
"""
system_prompt = SystemMessagePromptTemplate.from_template(template)
human_prompt = HumanMessagePromptTemplate.from_template("""{input}""")
chat_prompt = ChatPromptTemplate.from_messages([system_prompt, human_prompt])
return chat_prompt
def chain():
#memory = ConversationBufferMemory(memory_key="history")
chat_prompt = chat_template_prompt()
memory = ConversationBufferWindowMemory(k=3) #memory_key="history"
llm_chain = LLMChain(llm=llm, memory = memory, prompt = chat_prompt)
memory.load_memory_variables({}) #Initialize memory
return llm_chain
@spaces.GPU
def chat_output(message, history):
result = llm_chain.predict(input = message)
return result
with gr.Blocks() as demo:
llm_chain = chain()
#chatbot_component = gr.Chatbot(height=300, label = "history")
#textbox_component = gr.Textbox(placeholder="Can I help you to book a hotel?", container=False, label = "input", scale=7)
demo.chatbot_interface = gr.ChatInterface(
fn=chat_output,
examples = ["Hello I would like to book a hotel room.", "Hello I want to stay in Nuremberg in 30th of May." ],
#outputs=chatbot_component,
title = "Hotel Booking Assistant Chat 🤗",
description = "I am your hotel booking assistant. Feel free to start chatting with me."
)
demo.launch() |