import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import transformers
import torch
from huggingface_hub import login
from langchain_community.llms import HuggingFacePipeline

# login(token=token)

def greet(name):
    return str(int(name)+10)


# Load model directly
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")


# model = "meta-llama/Llama-2-13b-chat-hf" # meta-llama/Llama-2-7b-hf
# 
# tokenizer = AutoTokenizer.from_pretrained(model, use_auth_token=True)

pipe = pipeline(
    "text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    temperature=0.5,
    top_p=0.95,
    repetition_penalty=1.15
)

local_llm = HuggingFacePipeline(pipeline=pipe)

# def get_llama_response(prompt: str) -> None:
#     """
#     Generate a response from the Llama model.

#     Parameters:
#         prompt (str): The user's input/question for the model.

#     Returns:
#         None: Prints the model's response.
#     """
#     sequences = llama_pipeline(
#         prompt,
#         do_sample=True,
#         top_k=10,
#         num_return_sequences=1,
#         eos_token_id=tokenizer.eos_token_id,
#         max_length=256,
#         truncation=True
#     )
#     print("Chatbot:", sequences[0]['generated_text'])


# prompt = 'I liked "Breaking Bad" and "Band of Brothers". Do you have any recommendations of other shows I might like?\n'
# get_llama_response(prompt)
print('hhh')
iface = gr.Interface(fn=greet, inputs="text", outputs="text")
iface.launch()