Spaces:
Runtime error
Runtime error
# GODEL model module for chat interaction and model instance control | |
# external imports | |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, GenerationConfig | |
# internal imports | |
from utils import modelling as mdl | |
# global model and tokenizer instance (created on initial build) | |
TOKENIZER = AutoTokenizer.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq") | |
MODEL = AutoModelForSeq2SeqLM.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq") | |
# model config definition | |
CONFIG = GenerationConfig.from_pretrained("microsoft/GODEL-v1_1-large-seq2seq") | |
base_config_dict = { | |
"max_new_tokens": 50, | |
"min_length": 8, | |
"top_p": 0.9, | |
"do_sample": True, | |
} | |
CONFIG.update(**base_config_dict) | |
# function to (re) set config | |
def set_config(config_dict: dict): | |
# if config dict is not given, set to default | |
if config_dict == {}: | |
config_dict = base_config_dict | |
CONFIG.update(**config_dict) | |
# formatting class to formatting input for the model | |
# CREDIT: Adapted from official interference example on Huggingface | |
## see https://huggingface.co/microsoft/GODEL-v1_1-large-seq2seq | |
def format_prompt(message: str, history: list, system_prompt: str, knowledge: str = ""): | |
# user input prompt initialization | |
prompt = "" | |
# limits the prompt elements to the maximum token count | |
message, history, system_prompt, knowledge = mdl.prompt_limiter( | |
TOKENIZER, message, history, system_prompt, knowledge | |
) | |
# adds knowledge text if not empty | |
if knowledge != "": | |
knowledge = "[KNOWLEDGE] " + knowledge | |
# adds conversation history to the prompt | |
for conversation in history: | |
prompt += f"EOS {conversation[0]} EOS {conversation[1]}" | |
# adds the message to the prompt | |
prompt += f" {message}" | |
# combines the entire prompt | |
full_prompt = f"{system_prompt} [CONTEXT] {prompt} {knowledge}" | |
# returns the formatted prompt | |
return full_prompt | |
# response class calling the model and returning the model output message | |
# CREDIT: Copied from official interference example on Huggingface | |
## see https://huggingface.co/microsoft/GODEL-v1_1-large-seq2seq | |
def respond(prompt): | |
set_config({}) | |
# tokenizing input string | |
input_ids = TOKENIZER(f"{prompt}", return_tensors="pt").input_ids | |
# generating using config and decoding output | |
outputs = MODEL.generate(input_ids, generation_config=CONFIG) | |
output = TOKENIZER.decode(outputs[0], skip_special_tokens=True) | |
# returns the model output string | |
return output | |