Spaces:

Cachoups
/

LoL_Lore

Running

App Files Files Community

Cachoups commited on 22 days ago

Commit

11f5bf4

verified ·

1 Parent(s): 2f6c8be

Update app.py

Browse files

Files changed (1) hide show

app.py +122 -2

app.py CHANGED Viewed

@@ -1,11 +1,115 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
 def respond(
     message,
@@ -15,6 +119,22 @@ def respond(
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
     for val in history:
@@ -22,7 +142,7 @@ def respond(
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""

 import gradio as gr
 from huggingface_hub import InferenceClient
+from huggingface_hub import login
+import re
+import pandas as pd
+from langchain.schema import Document
+from langchain.text_splitter import TokenTextSplitter
+from transformers import AutoTokenizer
+import copy
+from langchain_community.retrievers import BM25Retriever
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
 client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
+# Pre-processing
+def preprocess_for_bm25(text):
+    # Replace "..." with a unique placeholder
+    text = text.replace("...", " _ELLIPSIS_ ")
+    # Add space before and after punctuation (except "_ELLIPSIS_")
+    text = re.sub(r'([.,!?()"])', r' \1 ', text)
+    # Restore "..." from the placeholder
+    text = text.replace("_ELLIPSIS_", "...")
+    # Normalize spaces
+    text = re.sub(r'\s+', ' ', text).strip()
+    text = text.lower()
+    return text
+"""Pre-processing"""
+# Convert DataFrame to documents
+documents = []
+for _, row in df1.iterrows():
+    biography_text = row['Story']
+    documents.append(Document(
+            page_content= biography_text,  # Text of the chunk
+            metadata= {
+            'champion_name': row['Champion'],
+            'role': row['Role']
+        ))
+"""Chunking"""
+# Specify the model name
+EMBEDDING_MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"
+tokenizer_name = EMBEDDING_MODEL_NAME
+# Token splitting for more context split
+text_splitter = TokenTextSplitter.from_huggingface_tokenizer(
+    tokenizer=AutoTokenizer.from_pretrained(tokenizer_name),
+    chunk_size=300,
+    chunk_overlap=30
+)
+chunks = text_splitter.split_documents(documents) # chunks used for LLM generation
+chunks_bm25 = copy.deepcopy(chunks)  # Creates an independent copy, chunks used for BM25 retriever
+for i, doc in enumerate(chunks_bm25):
+    doc.page_content = preprocess_for_bm25(doc.page_content)  # Modify page_content in place
+    doc.metadata["index"] = i  # Add an index for tracking
+for i, doc in enumerate(chunks):
+    doc.metadata["index"] = i  # Add an index for tracking
+"""Retriever"""
+bm25_retriever = BM25Retriever.from_documents(chunks_bm25, k = 2) # 2 most similar contexts
+"""Chain"""
+from langchain_core.runnables.passthrough import RunnablePassthrough
+from langchain.prompts import ChatPromptTemplate
+from langchain_core.output_parsers.string import StrOutputParser
+from langchain_community.llms.huggingface_hub import HuggingFaceHub
+import os
+from langchain_core.runnables import RunnableLambda
+prompt = f"""
+You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.
+Instructions:
+1. **Only use the context provided below** to answer the question. You should reference the context directly to ensure your answer is as relevant as possible.
+2. If the question is outside the scope of League of Legends lore, respond by saying: *"Please ask something related to League of Legends lore."*
+3. If the provided context does not provide a clear answer or you're unsure, respond by saying: *"I'm unsure based on the provided context."*
+Context: {context}
+Question: {question}
+Answer:
+"""
+prompt_template = ChatPromptTemplate.from_template(prompt)
+llm = HuggingFaceHub(
+    repo_id="HuggingFaceH4/zephyr-7b-beta",
+    model_kwargs={"temperature": 0.1, "max_length": 50, "return_full_text" : False}
+)
+def ra(user_question):
+    prompt = f"You know things about League of Legends. Please correct the following question for grammar and clarity.Do not give explaination:\n{user_question}\nCorrected question:"
+    # Pass the prompt to the LLM and get the response
+    rephrased_query = llm(prompt)  # Replace `llm` with the appropriate LLM function or API call
+    new_query = rephrased_query.strip()
+    return {'context' : retriever(new_query), 'question': new_query}
+# chain = RunnablePassthrough() | RunnableLambda(ra) | prompt_template | client.chat_completion() | StrOutputParser() for notebook
+"""-------------------------------------------------------------------"""
 def respond(
     message,
     temperature,
     top_p,
 ):
+    res = ra(val[1])
+    system_message = f"""
+    You are an expert in League of Legends (LoL) lore. You will only answer questions related to the champions and their stories within the game.
+    Instructions:
+    1. **Only use the context provided below** to answer the question. You should reference the context directly to ensure your answer is as relevant as possible.
+    2. If the question is outside the scope of League of Legends lore, respond by saying: *"Please ask something related to League of Legends lore."*
+    3. If the provided context does not provide a clear answer or you're unsure, respond by saying: *"I'm unsure based on the provided context."*
+    Context: {res['context']}
+    Question: {res['question']}
+    Answer:
+    """
     messages = [{"role": "system", "content": system_message}]
     for val in history:
             messages.append({"role": "user", "content": val[0]})
         if val[1]:
             messages.append({"role": "assistant", "content": val[1]})
     messages.append({"role": "user", "content": message})
     response = ""