import gradio as gr import torch from transformers import pipeline import os import spaces #load_dotenv() key=os.environ["HF_KEY"] def load_model(): print("[INFO] Loading model... This may take a minute on Spaces") pipe = pipeline( task="fill-mask", model="BounharAbdelaziz/XLM-RoBERTa-Morocco", token=key, device=0, torch_dtype=torch.float16 # Use half precision ) print("[INFO] Model loaded successfully!") return pipe print("[INFO] load model ...") pipe=load_model() print("[INFO] model loaded") @spaces.GPU @gr.cache(persist=True) # Add persistent caching def predict(text): outputs = pipe(text) scores= [x["score"] for x in outputs] tokens= [x["token_str"] for x in outputs] return {label: float(prob) for label, prob in zip(tokens, scores)} # Create Gradio interface with gr.Blocks() as demo: with gr.Row(): with gr.Column(): # Input text box input_text = gr.Textbox( label="Input", placeholder="Enter text here...", rtl=True ) # Button row with gr.Row(): clear_btn = gr.Button("Clear") submit_btn = gr.Button("Submit", variant="primary") # Examples section with caching gr.Examples( examples=["العاصمة د هي الرباط","المغرب زوين","انا سميتي مريم، و كنسكن ف العاصمة دفلسطين"], inputs=input_text, cache_examples=True, preprocess=True # Precompute examples ) with gr.Column(): # Output probabilities output_labels = gr.Label( label="Prediction Results", show_label=False, num_top_classes=5 # Limit to top 5 predictions ) # Button actions submit_btn.click( predict, inputs=input_text, outputs=output_labels, show_progress=True # Show a progress indicator ) clear_btn.click( lambda: "", outputs=input_text ) # Launch the app with queue demo.queue(concurrency_count=3) # Allow 3 concurrent predictions demo.launch(show_api=False) # Disable API tab if not needed