# from transformers import AutoModelForCausalLM, AutoTokenizer import time import datetime import streamlit as st question = "Name the planets in the solar system? A: " question = "Quais são os planetas do sistema solar?" question = "Qual é o maior planeta do sistema solar?" before = datetime.datetime.now() # Load model directly from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("01-ai/Yi-1.5-6B-Chat") model = AutoModelForCausalLM.from_pretrained("01-ai/Yi-1.5-6B-Chat") st.write('tokenizando...') prompt = "Hey, are you conscious? Can you talk to me?" inputs = tokenizer(prompt, return_tensors="pt") # Generate st.write('gerando a saida...') generate_ids = model.generate(inputs.input_ids, max_length=30) output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0] st.write('saída gerada') st.write(output) # Use a pipeline as a high-level helper # from transformers import pipeline # messages = [ # {"role": "user", "content": question}, # ] # print('gerando a saida...') # st.write('gerando a saida...') # pipe = pipeline("text-generation", model="01-ai/Yi-1.5-34B-Chat") # st.write('pipeline...') # output = pipe(messages) # st.write('saída gerada...') # st.write(output) # print('tokenizando...') # tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False) # print('tokenizado.') # print('carregando o modelo...') # # Since transformers 4.35.0, the GPT-Q/AWQ model can be loaded using AutoModelForCausalLM. # model = AutoModelForCausalLM.from_pretrained( # model_path, # device_map="auto", # torch_dtype='auto' # ).eval() # print('modelo carreegado.') # # Prompt content: "hi" # messages = [ # {"role": "user", "content": question} # ] # print('tokenizando o prompt...') # input_ids = tokenizer.apply_chat_template(conversation=messages, tokenize=True, return_tensors='pt') # print('prompt tokenizado.') # print('gerando a saida...') # output_ids = model.generate(input_ids, eos_token_id=tokenizer.eos_token_id, # max_new_tokens=10) #10 # 45 # # max_new_tokens=22) print('saida gerada.') # print('Decodificando a saida...') # response = tokenizer.decode(output_ids[0][input_ids.shape[1]:], skip_special_tokens=True) # print('saida decodificada.') # Model response: "Hello! How can I assist you today?" # print(response) # question = output['choices'][0]['text'].split('A:')[0] # answer = output['choices'][0]['text'].split('A:')[1] # answer = 'A: ' + answer print('\n\n') print(question) print(response) after = datetime.datetime.now() current_time = (after - before) # .strftime("%H:%M:%S") print("\nTime Elapsed: ", current_time)