import streamlit as st
from transformers import AutoTokenizer, AutoModelForCausalLM
import chromadb
from chromadb.config import Settings
from chromadb.utils import embedding_functions
from sentence_transformers import SentenceTransformer
import os
from huggingface_hub import login
from transformers import AutoModel

# Retrieve the API token from the environment variable
os.environ["HF_API_TOKEN"]  = os.getenv("HF_TOKEN")


# Set your Hugging Face API token as an environment variable
os.environ["HF_API_TOKEN"] = "your_huggingface_api_token"


# Load the Llama model using Hugging Face Transformers
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-chat-hf")

# Initialize ChromaDB
client = chromadb.Client(Settings(chroma_db_impl="duckdb+parquet", persist_directory="./chroma_db"))

# Create a collection for storing supply chain and green environment data
collection = client.get_or_create_collection(
    name="supply_chain_green_environment",
    embedding_function=embedding_functions.SentenceTransformerEmbeddingFunction(
        model_name="all-mpnet-base-v2"
    ),
)

# Initialize the sentence transformer for generating embeddings
embedding_model = SentenceTransformer("all-mpnet-base-v2")

# Streamlit app title
st.title("Supply Chain & Green Environment Chatbot")

# User input for questions
user_question = st.text_input("Enter your question:")

# Chat history
if "chat_history" not in st.session_state:
    st.session_state.chat_history = []

# Process user input and generate response
if user_question:
    # Generate embedding for the user question
    question_embedding = embedding_model.encode(user_question).tolist()

    # Search for relevant information in the ChromaDB collection
    results = collection.query(
        query_embeddings=question_embedding,
        n_results=3,
    )

    # Construct the context for the Llama model
    context = ""
    for doc in results["documents"][0]:
        context += doc + "\n"

    # Generate response from the Llama model
    inputs = tokenizer(f"Context: {context}\n\nQuestion: {user_question}\n\nAnswer:", return_tensors="pt")
    outputs = model.generate(**inputs, max_length=256)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Add user question and bot response to chat history
    st.session_state.chat_history.append({"user": user_question, "bot": response})

# Display chat history
for message in st.session_state.chat_history:
    st.write(f"**User:** {message['user']}")
    st.write(f"**Bot:** {message['bot']}")