--- license: apache-2.0 --- ## INFERENCE CODE ```bash pip install transformers[torch] ``` ```python from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer import torch import time tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/DB-Summarizer") model = AutoModelForCausalLM.from_pretrained("AquilaX-AI/DB-Summarizer") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") question = "How many Vulnerability found today" db_result = "243" summ_inp = f"""<|im_start|>system Generate a clear and accurate response based on the user's question and the database output.<|im_end|> <|im_start|>user user_question: {question} db_response: {db_result}<|im_end|> <|im_start|>assistant""" import time start = time.time() encodeds = tokenizer(summ_inp, return_tensors="pt",truncation=True).input_ids.to(device) model.to(device) text_streamer = TextStreamer(tokenizer, skip_prompt = True) response = model.generate( input_ids=encodeds, streamer=text_streamer, max_new_tokens=512, use_cache=True, pad_token_id=151645, eos_token_id=151645, num_return_sequences=1 ) end = time.time() print(f"Time taken: {end - start}") ```