LlamaDutchDemo / app.py
polpoDevs's picture
Rename DemoApp.py to app.py
0c87aa4 verified
raw
history blame
3.04 kB
import re
import time
import streamlit as st
from transformers import pipeline, Conversation, AutoTokenizer
from langdetect import detect
import torch
print(f"Is CUDA available: {torch.cuda.is_available()}")
# True
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Tesla T4
# choose your model here by setting model_chosen_id equal to 1 or 2
model_chosen_id = 2
model_name_options = {
1: "meta-llama/Llama-2-13b-chat-hf",
2: "BramVanroy/Llama-2-13b-chat-dutch"
}
model_chosen = model_name_options[model_chosen_id]
my_config = {'model_name': model_chosen, 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500, }
print(f"Selected model: {my_config['model_name']}")
print(f"Parameters are: {my_config}")
def count_words(text):
# Use a simple regular expression to count words
words = re.findall(r'\b\w+\b', text)
return len(words)
def generate_with_llama_chat(my_config):
# get the parameters from the config dict
do_sample = my_config.get('do_sample', True)
temperature = my_config.get('temperature', 0.1)
repetition_penalty = my_config.get('repetition_penalty', 1.1)
max_new_tokens = my_config.get('max_new_tokens', 500)
start_time = time.time()
model = my_config['model_name']
tokenizer = AutoTokenizer.from_pretrained(model)
chatbot = pipeline("conversational",model=model,
tokenizer=tokenizer,
do_sample=do_sample,
temperature=temperature,
repetition_penalty=repetition_penalty,
#max_length=2000,
max_new_tokens=max_new_tokens,
#model_kwargs={"device_map": "auto","load_in_8bit": True}
) #, "src_lang": "en", "tgt_lang": "nl"}) does not work!
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Loading the model: {elapsed_time} seconds")
return chatbot
def get_answer(chatbot, input_text):
start_time = time.time()
print(f"Processing the input\n {input_text}\n")
print('Processing the answer....')
conversation = Conversation(input_text)
print(f"Conversation(input_text): {conversation}")
output = (chatbot(conversation))[1]['content']
elapsed_time = time.time() - start_time
#Add the last print statement to the output variable
output += f"\nAnswered in {elapsed_time:.1f} seconds, Nr generated words: {count_words(output)}"
return output
if "model_name" not in st.session_state.keys():
# Initialize the model with the default option
st.session_state["model_name"] = "BramVanroy/Llama-2-13b-chat-dutch"
my_config.update({'model_name': st.session_state["model_name"]})
llm_chatbot = generate_with_llama_chat(my_config)
st.session_state["model"] = llm_chatbot
text = st.text_area("Enter text to summarize here.")
if text:
out = get_answer(st.session_state["model"], text)
st.write(out)