import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import gdown
import os

# Set the title of the Streamlit app
st.title("Text Classification with Hugging Face Transformers")

# Function to download the model from Google Drive
def download_model_from_drive(file_id, dest_path):
    url = f'https://drive.google.com/uc?id={file_id}'
    gdown.download(url, dest_path, quiet=False)

# Download the model files
with st.spinner("Downloading model..."):
    download_model_from_drive('1-V2bEtPR9Y3iBXK9zOR-qM5y9hKiQUnF', 'model/model.safetensors')
    download_model_from_drive('1-T2etSP_k_3j5LzunWq8viKGQCQ5RMr_', 'model/config.json')
    download_model_from_drive('1-cRYNPWqlNNGRxeztympRRfVuy3hWuMY', 'model/tokenizer.json')
    download_model_from_drive('1-t9AhomeH7YIIpAqCGTok8wjvl0tml0F', 'model/vocab.json')
    download_model_from_drive('1-l77_KEdK7GBFjMX_6UXGE-ZTGDraaDm', 'model/merges.txt')

# Load the model and tokenizer
@st.cache(allow_output_mutation=True)
def load_model_and_tokenizer():
    tokenizer = AutoTokenizer.from_pretrained('model')
    # For Safetensors, you might need a custom loading mechanism
    model = AutoModelForSequenceClassification.from_pretrained('model', use_safetensors=True)  # Adjust if necessary
    return tokenizer, model

tokenizer, model = load_model_and_tokenizer()

# Input text from user
input_text = st.text_area("Enter the text to classify:")

if st.button("Classify"):
    if input_text:
        # Tokenize the input text
        inputs = tokenizer(input_text, return_tensors="pt")
        
        # Perform classification
        with torch.no_grad():
            outputs = model(**inputs)
        
        # Get the predicted class
        predicted_class = torch.argmax(outputs.logits, dim=1).item()

        st.write(f"Predicted Class: {predicted_class}")
    else:
        st.write("Please enter some text to classify.")