Cachoups commited on
Commit
11f5bf4
·
verified ·
1 Parent(s): 2f6c8be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -2
app.py CHANGED
@@ -1,11 +1,115 @@
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
-
 
 
 
 
 
 
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
  def respond(
11
  message,
@@ -15,6 +119,22 @@ def respond(
15
  temperature,
16
  top_p,
17
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  messages = [{"role": "system", "content": system_message}]
19
 
20
  for val in history:
@@ -22,7 +142,7 @@ def respond(
22
  messages.append({"role": "user", "content": val[0]})
23
  if val[1]:
24
  messages.append({"role": "assistant", "content": val[1]})
25
-
26
  messages.append({"role": "user", "content": message})
27
 
28
  response = ""
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
3
+ from huggingface_hub import login
4
+ import re
5
+ import pandas as pd
6
+ from langchain.schema import Document
7
+ from langchain.text_splitter import TokenTextSplitter
8
+ from transformers import AutoTokenizer
9
+ import copy
10
+ from langchain_community.retrievers import BM25Retriever
11
  """
12
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
13
  """
14
  client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
15
 
16
+ # Pre-processing
17
+ def preprocess_for_bm25(text):
18
+ # Replace "..." with a unique placeholder
19
+ text = text.replace("...", " _ELLIPSIS_ ")
20
+
21
+ # Add space before and after punctuation (except "_ELLIPSIS_")
22
+ text = re.sub(r'([.,!?()"])', r' \1 ', text)
23
+
24
+ # Restore "..." from the placeholder
25
+ text = text.replace("_ELLIPSIS_", "...")
26
+
27
+ # Normalize spaces
28
+ text = re.sub(r'\s+', ' ', text).strip()
29
+ text = text.lower()
30
+ return text
31
+
32
+ """Pre-processing"""
33
+ # Convert DataFrame to documents
34
+ documents = []
35
+ for _, row in df1.iterrows():
36
+ biography_text = row['Story']
37
+ documents.append(Document(
38
+ page_content= biography_text, # Text of the chunk
39
+ metadata= {
40
+ 'champion_name': row['Champion'],
41
+ 'role': row['Role']
42
+ ))
43
+
44
+ """Chunking"""
45
+
46
+ # Specify the model name
47
+ EMBEDDING_MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
48
+ tokenizer_name = EMBEDDING_MODEL_NAME
49
+
50
+ # Token splitting for more context split
51
+ text_splitter = TokenTextSplitter.from_huggingface_tokenizer(
52
+ tokenizer=AutoTokenizer.from_pretrained(tokenizer_name),
53
+ chunk_size=300,
54
+ chunk_overlap=30
55
+ )
56
+
57
+ chunks = text_splitter.split_documents(documents) # chunks used for LLM generation
58
+
59
+ chunks_bm25 = copy.deepcopy(chunks) # Creates an independent copy, chunks used for BM25 retriever
60
+
61
+ for i, doc in enumerate(chunks_bm25):
62
+ doc.page_content = preprocess_for_bm25(doc.page_content) # Modify page_content in place
63
+ doc.metadata["index"] = i # Add an index for tracking
64
+
65
+ for i, doc in enumerate(chunks):
66
+ doc.metadata["index"] = i # Add an index for tracking
67
+
68
+ """Retriever"""
69
+ bm25_retriever = BM25Retriever.from_documents(chunks_bm25, k = 2) # 2 most similar contexts
70
+
71
+
72
+ """Chain"""
73
+
74
+ from langchain_core.runnables.passthrough import RunnablePassthrough
75
+ from langchain.prompts import ChatPromptTemplate
76
+ from langchain_core.output_parsers.string import StrOutputParser
77
+ from langchain_community.llms.huggingface_hub import HuggingFaceHub
78
+ import os
79
+ from langchain_core.runnables import RunnableLambda
80
+
81
+
82
+ prompt = f"""
83
+ You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.
84
+
85
+ Instructions:
86
+ 1. **Only use the context provided below** to answer the question. You should reference the context directly to ensure your answer is as relevant as possible.
87
+ 2. If the question is outside the scope of League of Legends lore, respond by saying: *"Please ask something related to League of Legends lore."*
88
+ 3. If the provided context does not provide a clear answer or you're unsure, respond by saying: *"I'm unsure based on the provided context."*
89
+
90
+ Context: {context}
91
+
92
+ Question: {question}
93
+
94
+ Answer:
95
+
96
+ """
97
+ prompt_template = ChatPromptTemplate.from_template(prompt)
98
+ llm = HuggingFaceHub(
99
+ repo_id="HuggingFaceH4/zephyr-7b-beta",
100
+ model_kwargs={"temperature": 0.1, "max_length": 50, "return_full_text" : False}
101
+ )
102
+
103
+ def ra(user_question):
104
+ prompt = f"You know things about League of Legends. Please correct the following question for grammar and clarity.Do not give explaination:\n{user_question}\nCorrected question:"
105
+
106
+ # Pass the prompt to the LLM and get the response
107
+ rephrased_query = llm(prompt) # Replace `llm` with the appropriate LLM function or API call
108
+ new_query = rephrased_query.strip()
109
+ return {'context' : retriever(new_query), 'question': new_query}
110
+ # chain = RunnablePassthrough() | RunnableLambda(ra) | prompt_template | client.chat_completion() | StrOutputParser() for notebook
111
+
112
+ """-------------------------------------------------------------------"""
113
 
114
  def respond(
115
  message,
 
119
  temperature,
120
  top_p,
121
  ):
122
+ res = ra(val[1])
123
+ system_message = f"""
124
+ You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.
125
+
126
+ Instructions:
127
+ 1. **Only use the context provided below** to answer the question. You should reference the context directly to ensure your answer is as relevant as possible.
128
+ 2. If the question is outside the scope of League of Legends lore, respond by saying: *"Please ask something related to League of Legends lore."*
129
+ 3. If the provided context does not provide a clear answer or you're unsure, respond by saying: *"I'm unsure based on the provided context."*
130
+
131
+ Context: {res['context']}
132
+
133
+ Question: {res['question']}
134
+
135
+ Answer:
136
+
137
+ """
138
  messages = [{"role": "system", "content": system_message}]
139
 
140
  for val in history:
 
142
  messages.append({"role": "user", "content": val[0]})
143
  if val[1]:
144
  messages.append({"role": "assistant", "content": val[1]})
145
+
146
  messages.append({"role": "user", "content": message})
147
 
148
  response = ""