from transformers import AutoTokenizer, AutoModelForCausalLM from peft import LoraConfig, get_peft_model import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM config = PeftConfig.from_pretrained("youssef227/llama-3-8b-Instruct-bnb-telcom-3") print("step 1 ") base_model = AutoModelForCausalLM.from_pretrained("unsloth/llama-3-8b-Instruct-bnb-4bit") print("step 2") model = PeftModel.from_pretrained(base_model, "youssef227/llama-3-8b-Instruct-bnb-telcom-3") print("step 3") # Load the tokenizer and model # print("step 1 ") # tokenizer = AutoTokenizer.from_pretrained("youssef227/llama-3-8b-Instruct-bnb-telcom-3") # print("step 2 ") # model = AutoModelForCausalLM.from_pretrained("youssef227/llama-3-8b-Instruct-bnb-telcom-3") def generator(text): inputs = tokenizer( [ alpaca_prompt.format( f" {context}انت ممثل خدمة العملاء لدى شركة فودافون.و دي معلومات ممكن تفيدك", # instruction text, # input "", # output - leave this blank for generation! ) ], return_tensors = "pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True) return tokenizer.batch_decode(outputs) text = st.text_area('enter some text!') if text: out = generator(text) st.json(out)