import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import transformers import torch from huggingface_hub import login from langchain_community.llms import HuggingFacePipeline # login(token=token) def greet(name): return str(int(name)+10) # Load model directly tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf") model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf") # model = "meta-llama/Llama-2-13b-chat-hf" # meta-llama/Llama-2-7b-hf # # tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True) pipe = pipeline( "text2text-generation", model=model, tokenizer=tokenizer, max_length=512, temperature=0.5, top_p=0.95, repetition_penalty=1.15 ) local_llm = HuggingFacePipeline(pipeline=pipe) # def get_llama_response(prompt: str) -> None: # """ # Generate a response from the Llama model. # Parameters: # prompt (str): The user's input/question for the model. # Returns: # None: Prints the model's response. # """ # sequences = llama_pipeline( # prompt, # do_sample=True, # top_k=10, # num_return_sequences=1, # eos_token_id=tokenizer.eos_token_id, # max_length=256, # truncation=True # ) # print("Chatbot:", sequences[0]['generated_text']) # prompt = 'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n' # get_llama_response(prompt) print('hhh') iface = gr.Interface(fn=greet, inputs="text", outputs="text") iface.launch()