polpoDevs commited on
Commit
01df37e
·
verified ·
1 Parent(s): a4e89a2

Create app.py

Browse files

Demo App Llama Dutch

Files changed (1) hide show
  1. app.py +69 -0
app.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import time
3
+ import streamlit as st
4
+ from transformers import pipeline, Conversation, AutoTokenizer
5
+ from langdetect import detect
6
+
7
+ # choose your model here by setting model_chosen_id equal to 1 or 2
8
+ model_chosen_id = 2
9
+ model_name_options = {
10
+ 1: "meta-llama/Llama-2-13b-chat-hf",
11
+ 2: "BramVanroy/Llama-2-13b-chat-dutch"
12
+ }
13
+ model_chosen = model_name_options[model_chosen_id]
14
+
15
+ my_config = {'model_name': model_chosen, 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500, }
16
+ print(f"Selected model: {my_config['model_name']}")
17
+ print(f"Parameters are: {my_config}")
18
+
19
+ def count_words(text):
20
+ # Use a simple regular expression to count words
21
+ words = re.findall(r'\b\w+\b', text)
22
+ return len(words)
23
+
24
+ def generate_with_llama_chat(my_config):
25
+ # get the parameters from the config dict
26
+ do_sample = my_config.get('do_sample', True)
27
+ temperature = my_config.get('temperature', 0.1)
28
+ repetition_penalty = my_config.get('repetition_penalty', 1.1)
29
+ max_new_tokens = my_config.get('max_new_tokens', 500)
30
+
31
+ start_time = time.time()
32
+ model = my_config['model_name']
33
+ tokenizer = AutoTokenizer.from_pretrained(model)
34
+
35
+ chatbot = pipeline("conversational",model=model,
36
+ tokenizer=tokenizer,
37
+ do_sample=do_sample,
38
+ temperature=temperature,
39
+ repetition_penalty=repetition_penalty,
40
+ #max_length=2000,
41
+ max_new_tokens=max_new_tokens,
42
+ model_kwargs={"device_map": "auto","load_in_8bit": True}) #, "src_lang": "en", "tgt_lang": "nl"}) does not work!
43
+ end_time = time.time()
44
+ elapsed_time = end_time - start_time
45
+ print(f"Loading the model: {elapsed_time} seconds")
46
+ return chatbot
47
+
48
+ def get_answer(chatbot, input_text):
49
+ start_time = time.time()
50
+ print(f"Processing the input\n {input_text}\n")
51
+ print('Processing the answer....')
52
+ conversation = Conversation(input_text)
53
+ print(f"Conversation(input_text): {conversation}")
54
+ output = (chatbot(conversation))[1]['content']
55
+
56
+ #Add the last print statement to the output variable
57
+ output += f"\nAnswered in {elapsed_time:.1f} seconds, Nr generated words: {count_words(output)}"
58
+
59
+ return output
60
+
61
+
62
+
63
+
64
+ chatbot = generate_with_llama_chat(my_config)
65
+ text = st.text_area("Enter text to summarize here.")
66
+
67
+ if text:
68
+ out = get_answer(chatbot, text)
69
+ st.json(out)