valencar commited on
Commit
b4fc608
·
1 Parent(s): 703c58c
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import time
3
+ import datetime
4
+
5
+ import streamlit as streamlit
6
+
7
+ question = "Name the planets in the solar system? A: "
8
+
9
+ question = "Quais são os planetas do sistema solar?"
10
+
11
+ question = "Qual é o maior planeta do sistema solar?"
12
+
13
+
14
+ before = datetime.datetime.now()
15
+
16
+ # Use a pipeline as a high-level helper
17
+ from transformers import pipeline
18
+
19
+ messages = [
20
+ {"role": "user", "content": question},
21
+ ]
22
+
23
+ print('gerando a saida...')
24
+ pipe = pipeline("text-generation", model="01-ai/Yi-1.5-34B-Chat")
25
+
26
+ output = pipe(messages)
27
+
28
+ st.write(output)
29
+
30
+
31
+
32
+ # print('tokenizando...')
33
+
34
+ # tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
35
+
36
+ # print('tokenizado.')
37
+
38
+ # print('carregando o modelo...')
39
+ # # Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM.
40
+ # model = AutoModelForCausalLM.from_pretrained(
41
+ # model_path,
42
+ # device_map="auto",
43
+ # torch_dtype='auto'
44
+ # ).eval()
45
+ # print('modelo carreegado.')
46
+
47
+ # # Prompt content: "hi"
48
+ # messages = [
49
+ # {"role": "user", "content": question}
50
+ # ]
51
+
52
+ # print('tokenizando o prompt...')
53
+ # input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, return_tensors='pt')
54
+ # print('prompt tokenizado.')
55
+
56
+ # print('gerando a saida...')
57
+ # output_ids = model.generate(input_ids, eos_token_id=tokenizer.eos_token_id,
58
+ # max_new_tokens=10) #10 # 45
59
+ # # max_new_tokens=22)
60
+ print('saida gerada.')
61
+
62
+ # print('Decodificando a saida...')
63
+ # response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
64
+ # print('saida decodificada.')
65
+
66
+ # Model response: "Hello! How can I assist you today?"
67
+ # print(response)
68
+
69
+ # question = output['choices'][0]['text'].split('A:')[0]
70
+ # answer = output['choices'][0]['text'].split('A:')[1]
71
+ # answer = 'A: ' + answer
72
+
73
+ print('\n\n')
74
+ print(question)
75
+ print(response)
76
+
77
+ after = datetime.datetime.now()
78
+
79
+ current_time = (after - before) # .strftime("%H:%M:%S")
80
+ print("\nTime Elapsed: ", current_time)