from ctransformers import AutoTokenizer, AutoModelForCausalLM from peft import LoraConfig, get_peft_model import torch # Load the tokenizer and model model = AutoModelForCausalLM.from_pretrained("youssef227/llama-3-8b-Instruct-bnb-telcom-3",hf=True) print("step 1 ") tokenizer = AutoTokenizer.from_pretrained(model) print("step 2 ") def generator(text): inputs = tokenizer( [ alpaca_prompt.format( f" {context}انت ممثل خدمة العملاء لدى شركة فودافون.و دي معلومات ممكن تفيدك", # instruction text, # input "", # output - leave this blank for generation! ) ], return_tensors = "pt").to("cuda") outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True) return tokenizer.batch_decode(outputs) text = st.text_area('enter some text!') if text: out = generator(text) st.json(out)