import os
import torch
import gradio as gr
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TextClassificationPipeline
)

# === Config ===
MODEL_ID = "Omartificial-Intelligence-Space/SA-BERT-Classifier"
HF_TOKEN = os.getenv("HUGGINGFACE_HUB_TOKEN")
DEVICE = 0 if torch.cuda.is_available() else -1

# === Load model and tokenizer ===
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, use_auth_token=HF_TOKEN)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_ID, use_auth_token=HF_TOKEN
).to("cuda" if DEVICE == 0 else "cpu")

# === Build pipeline ===
pipeline = TextClassificationPipeline(
    model=model,
    tokenizer=tokenizer,
    device=DEVICE,
    top_k=None  # replaces deprecated return_all_scores
)

# === Inference function ===
def classify_dialect(text):
    results = pipeline(text)[0]
    scores = {int(item["label"].split("_")[-1]): item["score"] for item in results}
    p_non_saudi = scores.get(0, 0.0)
    p_saudi = scores.get(1, 0.0)
    prediction = "Saudi Dialect" if p_saudi > p_non_saudi else "Non-Saudi Dialect"
    return round(p_saudi, 4), round(p_non_saudi, 4), prediction

# === Gradio Interface ===
demo = gr.Interface(
    fn=classify_dialect,
    inputs=gr.Textbox(lines=2, placeholder="اكتب جملة باللهجة العربية هنا..."),
    outputs=[
        gr.Label(label="Saudi Dialect (Probability)"),
        gr.Label(label="Non-Saudi Dialect (Probability)"),
        gr.Textbox(label="Final Prediction")
    ],
    title="🗣️ Saudi Dialect Classifier",
    description="🔍 نموذج BERT لتصنيف الجمل إلى لهجة سعودية أو غير سعودية.\n\n👩‍💻 Deployed by **Ayesha Shafique** [LinkedIn](https://www.linkedin.com/in/aieeshashafique/)\n\n🌐 Model credit: [Omartificial-Intelligence-Space](https://huggingface.co/Omartificial-Intelligence-Space)",
    allow_flagging="never"
)

# === Launch App ===
if __name__ == "__main__":
    demo.launch()