Spaces:
Sleeping
Sleeping
import gradio as gr | |
import re | |
import time | |
import streamlit as st | |
from transformers import pipeline, Conversation, AutoTokenizer | |
#"meta-llama/Llama-2-13b-chat-hf" | |
my_config = {'model_name': "BramVanroy/Llama-2-13b-chat-dutch", 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500} | |
print(f"Loading the model: {my_config['model_name']}....") | |
time_load_model_start = time.time() | |
print(time_load_model_start) | |
# Load the model and tokenizer outside of the functions | |
llm = pipeline("text-generation", | |
model=my_config['model_name'], | |
tokenizer=AutoTokenizer.from_pretrained(my_config['model_name']), | |
do_sample=my_config['do_sample'], | |
temperature=my_config['temperature'], | |
repetition_penalty=my_config['repetition_penalty'], | |
max_new_tokens=my_config['max_new_tokens'] | |
) | |
time_load_model_end = time.time() | |
elapsed_time = time_load_model_end - time_load_model_start | |
print(f"Elapsed time to load the model: {elapsed_time:.2f} sec") | |
def get_answer(chatbot, input_text): | |
start_time = time.time() | |
print(f"Processing the input\n {input_text}\n") | |
print('Processing the answer....') | |
conversation = Conversation(input_text) | |
print(f"Conversation(input_text): {conversation}") | |
output = (chatbot(conversation))[1]['content'] | |
elapsed_time = time.time() - start_time | |
#Add the last print statement to the output variable | |
output += f"\nAnswered in {elapsed_time:.1f} seconds, Nr generated words: {count_words(output)}" | |
return output | |
#gr.ChatInterface(get_answer(llm, text)).launch() | |
demo = gr.Interface(fn=get_answer, inputs="text", outputs="text") | |
demo.launch() |