Spaces:

LennardZuendorf
/

thesis

Runtime error

App Files Files Community

LennardZuendorf commited on Feb 2, 2024

Commit

5d99c07

1 Parent(s): c5c1df2

feat: adding mistral model again

Browse files

Files changed (8) hide show

backend/controller.py +14 -3
explanation/interpret_captum.py +38 -0
explanation/markup.py +1 -1
explanation/visualize.py +1 -1
main.py +50 -27
model/mistral.py +105 -0
requirements.txt +1 -0
utils/modelling.py +25 -0

backend/controller.py CHANGED Viewed

@@ -6,6 +6,8 @@ import gradio as gr
 # internal imports
 from model import godel
 from explanation import interpret_shap as shap_int, visualize as viz
@@ -17,14 +19,20 @@ def interference(
     knowledge: str,
     system_prompt: str,
     xai_selection: str,
 ):
     # if no proper system prompt is given, use a default one
-    if system_prompt in ('', ' '):
         system_prompt = """
             You are a helpful, respectful and honest assistant.
             Always answer as helpfully as possible, while being safe.
         """
     # if a XAI approach is selected, grab the XAI module instance
     if xai_selection in ("SHAP", "Attention"):
         # matching selection
@@ -44,7 +52,7 @@ def interference(
         # call the explained chat function with the model instance
         prompt_output, history_output, xai_graphic, xai_markup = explained_chat(
-            model=godel,
             xai=xai,
             message=prompt,
             history=history,
@@ -55,7 +63,7 @@ def interference(
     else:
         # call the vanilla chat function
         prompt_output, history_output = vanilla_chat(
-            model=godel,
             message=prompt,
             history=history,
             system_prompt=system_prompt,
@@ -95,6 +103,9 @@ def explained_chat(
     model, xai, message: str, history: list, system_prompt: str, knowledge: str = ""
 ):
     # formatting the prompt using the model's format_prompt function
     prompt = model.format_prompt(message, history, system_prompt, knowledge)
     # generating an answer using the methods chat function

 # internal imports
 from model import godel
+from model import mistral
+from utils import modelling as mdl
 from explanation import interpret_shap as shap_int, visualize as viz
     knowledge: str,
     system_prompt: str,
     xai_selection: str,
+    model_selection: str,
 ):
     # if no proper system prompt is given, use a default one
+    if system_prompt in ("", " "):
         system_prompt = """
             You are a helpful, respectful and honest assistant.
             Always answer as helpfully as possible, while being safe.
         """
+    if model_selection.lower == "mistral":
+        model = mistral
+    else:
+        model = godel
     # if a XAI approach is selected, grab the XAI module instance
     if xai_selection in ("SHAP", "Attention"):
         # matching selection
         # call the explained chat function with the model instance
         prompt_output, history_output, xai_graphic, xai_markup = explained_chat(
+            model=model,
             xai=xai,
             message=prompt,
             history=history,
     else:
         # call the vanilla chat function
         prompt_output, history_output = vanilla_chat(
+            model=model,
             message=prompt,
             history=history,
             system_prompt=system_prompt,
     model, xai, message: str, history: list, system_prompt: str, knowledge: str = ""
 ):
     # formatting the prompt using the model's format_prompt function
+    message, history, system_prompt, knowledge = mdl.prompt_limiter(
+        message, history, system_prompt, knowledge
+    )
     prompt = model.format_prompt(message, history, system_prompt, knowledge)
     # generating an answer using the methods chat function

explanation/interpret_captum.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# external imports
+from captum.attr import LLMAttribution, TextTokenInput, KernelShap
+import torch
+# internal imports
+from utils import formatting as fmt
+from .markup import markup_text
+# main explain function that returns a chat with explanations
+def chat_explained(model, prompt):
+    model.set_config({})
+    # creating llm attribution class with KernelSHAP and Mistal Model, Tokenizer
+    llm_attribution = LLMAttribution(KernelShap(model.MODEL), model.TOKENIZER)
+    # generation attribution
+    attribution_input = TextTokenInput(prompt, model.TOKENIZER)
+    attribution_result = llm_attribution.attribute(attribution_input)
+    # extracting values and input tokens
+    values = attribution_result.seq_attr.to(torch.device("cpu")).numpy()
+    input_tokens = fmt.format_tokens(attribution_result.input_tokens)
+    # raising error if mismatch occurs
+    if len(attribution_result.input_tokens) != len(values):
+        raise RuntimeError("values and input len mismatch")
+    # getting response text, graphic placeholder and marked text object
+    response_text = fmt.format_output_text(attribution_result.output_tokens)
+    graphic = (
+        "<div style='text-align: center; font-family:arial;'><h4>Attention"
+        "Intepretation with Captum doesn't support an interactive graphic.</h4></div>"
+    )
+    marked_text = markup_text(input_tokens, values, variant="captum")
+    # return response, graphic and marked_text array
+    return response_text, graphic, marked_text

explanation/markup.py CHANGED Viewed

@@ -18,7 +18,7 @@ def markup_text(input_text: list, text_values: ndarray, variant: str):
     if variant == "shap":
         text_values = np.transpose(text_values)
         text_values = fmt.flatten_attribution(text_values)
-    else:
         text_values = fmt.flatten_attention(text_values)
     # Determine the minimum and maximum values

     if variant == "shap":
         text_values = np.transpose(text_values)
         text_values = fmt.flatten_attribution(text_values)
+    elif variant == "visualizer":
         text_values = fmt.flatten_attention(text_values)
     # Determine the minimum and maximum values

explanation/visualize.py CHANGED Viewed

@@ -3,7 +3,7 @@
 # internal imports
 from utils import formatting as fmt
-from model.godel import CONFIG
 from .markup import markup_text

 # internal imports
 from utils import formatting as fmt
+from model.model import CONFIG
 from .markup import markup_text

main.py CHANGED Viewed

@@ -97,31 +97,40 @@ with gr.Blocks(
                 """)
         # row with columns for the different settings
         with gr.Row(equal_height=True):
-            # accordion that extends if clicked
-            with gr.Accordion(label="Application Settings", open=False):
-                # column that takes up 3/4 of the row
-                with gr.Column(scale=3):
-                    # textbox to enter the system prompt
-                    system_prompt = gr.Textbox(
-                        label="System Prompt",
-                        info="Set the models system prompt, dictating how it answers.",
-                        # default system prompt is set to this in the backend
-                        placeholder=(
-                            "You are a helpful, respectful and honest assistant. Always"
-                            " answer as helpfully as possible, while being safe."
-                        ),
-                    )
-                # column that takes up 1/4 of the row
-                with gr.Column(scale=1):
-                    # checkbox group to select the xai method
-                    xai_selection = gr.Radio(
-                        ["None", "SHAP", "Attention"],
-                        label="Interpretability Settings",
-                        info="Select a Interpretability Implementation to use.",
-                        value="None",
-                        interactive=True,
-                        show_label=True,
-                    )
             # calling info functions on inputs/submits for different settings
             system_prompt.submit(system_prompt_info, [system_prompt])
@@ -247,13 +256,27 @@ with gr.Blocks(
     ## see backend/controller.py for more information
     submit_btn.click(
         interference,
-        [user_prompt, chatbot, knowledge_input, system_prompt, xai_selection],
         [user_prompt, chatbot, xai_interactive, xai_text],
     )
     # function triggered by the enter key
     user_prompt.submit(
         interference,
-        [user_prompt, chatbot, knowledge_input, system_prompt, xai_selection],
         [user_prompt, chatbot, xai_interactive, xai_text],
     )

                 """)
         # row with columns for the different settings
         with gr.Row(equal_height=True):
+            # column that takes up 3/4 of the row
+            with gr.Column(scale=2):
+                # textbox to enter the system prompt
+                system_prompt = gr.Textbox(
+                    label="System Prompt",
+                    info="Set the models system prompt, dictating how it answers.",
+                    # default system prompt is set to this in the backend
+                    placeholder=(
+                        "You are a helpful, respectful and honest assistant. Always"
+                        " answer as helpfully as possible, while being safe."
+                    ),
+                )
+            # column that takes up 1/4 of the row
+            with gr.Column(scale=1):
+                # checkbox group to select the xai method
+                xai_selection = gr.Radio(
+                    ["None", "SHAP", "Attention"],
+                    label="Interpretability Settings",
+                    info="Select a Interpretability Implementation to use.",
+                    value="None",
+                    interactive=True,
+                    show_label=True,
+                )
+            # column that takes up 1/4 of the row
+            with gr.Column(scale=1):
+                # checkbox group to select the xai method
+                model_selection = gr.Radio(
+                    ["GODEL", "Mistral"],
+                    label="Model Settings",
+                    info="Select a Model to use.",
+                    value="GODEL",
+                    interactive=True,
+                    show_label=True,
+                )
             # calling info functions on inputs/submits for different settings
             system_prompt.submit(system_prompt_info, [system_prompt])
     ## see backend/controller.py for more information
     submit_btn.click(
         interference,
+        [
+            user_prompt,
+            chatbot,
+            knowledge_input,
+            system_prompt,
+            xai_selection,
+            model_selection,
+        ],
         [user_prompt, chatbot, xai_interactive, xai_text],
     )
     # function triggered by the enter key
     user_prompt.submit(
         interference,
+        [
+            user_prompt,
+            chatbot,
+            knowledge_input,
+            system_prompt,
+            xai_selection,
+            model_selection,
+        ],
         [user_prompt, chatbot, xai_interactive, xai_text],
     )

model/mistral.py ADDED Viewed

	@@ -0,0 +1,105 @@

+# Mistral model module for chat interaction and model instance control
+# external imports
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+import gradio as gr
+# internal imports
+from utils import modelling as mdl
+# global model and tokenizer instance (created on inital build)
+device = mdl.get_device()
+if device == torch.device("cuda"):
+    n_gpus, max_memory, bnb_config = mdl.gpu_loading_config()
+    MODEL = AutoModelForCausalLM.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.2",
+        quantization_config=bnb_config,
+        device_map="auto",  # dispatch efficiently the model on the available ressources
+        max_memory={i: max_memory for i in range(n_gpus)},
+    )
+else:
+    MODEL = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+    MODEL.to(device)
+TOKENIZER = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2")
+# default model config
+CONFIG = {"max_new_tokens": 50, "min_length": 8, "top_p": 0.9, "do_sample": True}
+# function to (re) set config
+def set_config(config: dict):
+    global CONFIG
+    # if config dict is given, update it
+    if config != {}:
+        CONFIG = config
+    else:
+        # hard setting model config to default
+        # needed for shap
+        MODEL.config.max_new_tokens = 50
+        MODEL.config.min_length = 8
+        MODEL.config.top_p = 0.9
+        MODEL.config.do_sample = True
+# advanced formatting function that takes into a account a conversation history
+# CREDIT: adapted from Venkata Bhanu Teja Pallakonda in Huggingface discussions
+## see https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1/discussions/
+def format_prompt(message: str, history: list, system_prompt: str, knowledge: str = ""):
+    prompt = ""
+    if knowledge != "":
+        gr.Info("""
+    Mistral doesn't support additional knowledge, it's gonna be ignored.
+    """)
+    # if no history, use system prompt and example message
+    if len(history) == 0:
+        prompt = f"""<s>[INST] {system_prompt} [/INST] How can I help you today? </s>
+      [INST] {message} [/INST]"""
+    else:
+        # takes the very first exchange and the system prompt as base
+        for user_prompt, bot_response in history[0]:
+            prompt = (
+                f"<s>[INST] {system_prompt} {user_prompt} [/INST] {bot_response}</s>"
+            )
+        # takes all the following conversations and adds them as context
+        prompt += "".join(
+            f"[INST] {user_prompt} [/INST] {bot_response}</s>"
+            for user_prompt, bot_response in history[1:]
+        )
+    return prompt
+# function to extract real answer because mistral always returns the full prompt
+def format_answer(answer: str):
+    # empty answer string
+    formatted_answer = ""
+    # extracting text after INST tokens
+    parts = answer.split("[/INST]")
+    if len(parts) >= 3:
+        # Return the text after the second occurrence of [/INST]
+        formatted_answer = parts[2].strip()
+    else:
+        # Return an empty string if there are fewer than two occurrences of [/INST]
+        formatted_answer = ""
+    return formatted_answer
+def respond(prompt: str):
+    # tokenizing inputs and configuring model
+    input_ids = TOKENIZER(f"{prompt}", return_tensors="pt")["input_ids"]
+    # generating text with tokenized input, returning output
+    output_ids = MODEL.generate(input_ids, max_new_tokens=50, generation_config=CONFIG)
+    output_text = TOKENIZER.batch_decode(output_ids)
+    return format_answer(output_text)

requirements.txt CHANGED Viewed

@@ -2,6 +2,7 @@ gradio~=4.7.1
 transformers~=4.35.2
 torch~=2.1.1
 shap~=0.44.0
 bertviz~=1.4.0
 accelerate~=0.24.1
 markdown~=3.5.1

 transformers~=4.35.2
 torch~=2.1.1
 shap~=0.44.0
+captum
 bertviz~=1.4.0
 accelerate~=0.24.1
 markdown~=3.5.1

utils/modelling.py CHANGED Viewed

@@ -1,7 +1,9 @@
 # modelling util module providing formatting functions for model functionalities
 # external imports
 import gradio as gr
 # function that limits the prompt to contain model runtime
@@ -72,3 +74,26 @@ def token_counter(tokenizer, text: str):
     tokens = tokenizer(text, return_tensors="pt").input_ids
     # return the token count
     return len(tokens[0])

 # modelling util module providing formatting functions for model functionalities
 # external imports
+import torch
 import gradio as gr
+from transformers import BitsAndBytesConfig
 # function that limits the prompt to contain model runtime
     tokens = tokenizer(text, return_tensors="pt").input_ids
     # return the token count
     return len(tokens[0])
+def get_device():
+    if torch.cuda.is_available():
+        device = torch.device("cuda")
+    else:
+        device = torch.device("cpu")
+    return device
+# setting device based on available hardware
+def gpu_loading_config(max_memory: str = "15000MB"):
+    n_gpus = torch.cuda.device_count()
+    bnb_config = BitsAndBytesConfig(
+        load_in_4bit=True,
+        bnb_4bit_use_double_quant=True,
+        bnb_4bit_quant_type="nf4",
+        bnb_4bit_compute_dtype=torch.bfloat16,
+    )
+    return n_gpus, max_memory, bnb_config