Spaces:

polpoDevs
/

LlamaDutchDemo

Sleeping

App Files Files Community

LlamaDutchDemo / app.py

polpoDevs

Update app.py

a194d9e verified about 1 year ago

raw

history blame

1.73 kB

	import gradio as gr
	import re
	import time
	import streamlit as st
	from transformers import pipeline, Conversation, AutoTokenizer
	#"meta-llama/Llama-2-13b-chat-hf"
	my_config = {'model_name': "BramVanroy/Llama-2-13b-chat-dutch", 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500}

	print(f"Loading the model: {my_config['model_name']}....")
	time_load_model_start = time.time()

	print(time_load_model_start)

	# Load the model and tokenizer outside of the functions
	llm = pipeline("text-generation",
	model=my_config['model_name'],
	tokenizer=AutoTokenizer.from_pretrained(my_config['model_name']),
	do_sample=my_config['do_sample'],
	temperature=my_config['temperature'],
	repetition_penalty=my_config['repetition_penalty'],
	max_new_tokens=my_config['max_new_tokens']
	)
	time_load_model_end = time.time()
	elapsed_time = time_load_model_end - time_load_model_start
	print(f"Elapsed time to load the model: {elapsed_time:.2f} sec")

	def get_answer(chatbot, input_text):
	start_time = time.time()
	print(f"Processing the input\n {input_text}\n")
	print('Processing the answer....')
	conversation = Conversation(input_text)
	print(f"Conversation(input_text): {conversation}")
	output = (chatbot(conversation))[1]['content']
	elapsed_time = time.time() - start_time
	#Add the last print statement to the output variable
	output += f"\nAnswered in {elapsed_time:.1f} seconds, Nr generated words: {count_words(output)}"

	return output



	#gr.ChatInterface(get_answer(llm, text)).launch()
	demo = gr.Interface(fn=get_answer, inputs="text", outputs="text")
	demo.launch()