import gradio as gr import torch from transformers import pipeline import os import spaces from huggingface_hub import CommitScheduler from pathlib import Path import uuid import json import time from datetime import datetime import logging # Configure logging logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.FileHandler("app.log"), logging.StreamHandler() ] ) logger = logging.getLogger("darija-masked-lm") #load_dotenv() key=os.environ["HF_KEY"] # Define the file where to save the data submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json" feedback_file = submit_file # Create directory if it doesn't exist submit_file.parent.mkdir(exist_ok=True, parents=True) logger.info(f"Created feedback file: {feedback_file}") scheduler = CommitScheduler( repo_id="atlasia/atlaset_inference_ds", repo_type="dataset", folder_path=submit_file.parent, path_in_repo="masked_lm", every=5, token=key ) logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds") def save_feedback(input, output) -> None: """ Append input/outputs and parameters to a JSON Lines file using a thread lock to avoid concurrent writes from different users. """ logger.info(f"Saving feedback to {feedback_file}") with scheduler.lock: try: with feedback_file.open("a") as f: f.write(json.dumps({ "input": input, "output": output, })) f.write("\n") logger.info("Feedback saved successfully") except Exception as e: logger.error(f"Error saving feedback: {str(e)}") def load_model(): print("[INFO] Loading model... This may take a minute on Spaces") pipe = pipeline( task="fill-mask", model="atlasia/XLM-RoBERTa-Morocco", token=key, device=0, torch_dtype=torch.float16 # Use half precision ) print("[INFO] Model loaded successfully!") return pipe print("[INFO] load model ...") pipe=load_model() print("[INFO] model loaded") @spaces.GPU def predict(text): outputs = pipe(text) scores= [x["score"] for x in outputs] tokens= [x["token_str"] for x in outputs] result={label: float(prob) for label, prob in zip(tokens, scores)} # Save feedback with additional metadata save_feedback( text, result ) return result # Create Gradio interface with gr.Blocks() as demo: with gr.Row(): with gr.Column(): # Input text box input_text = gr.Textbox( label="Input", placeholder="Enter text here...", rtl=True ) # Button row with gr.Row(): clear_btn = gr.Button("Clear") submit_btn = gr.Button("Submit", variant="primary") # Output probabilities output_labels = gr.Label( label="Prediction Results", show_label=False ) # Examples section with basic configuration gr.Examples( examples=["العاصمة د هي الرباط","المغرب زوين","انا سميتي مريم، و كنسكن ف العاصمة دفلسطين"], inputs=input_text, fn=predict, outputs=output_labels, cache_examples=True ) # Button actions submit_btn.click( predict, inputs=input_text, outputs=output_labels ) clear_btn.click( lambda: "", outputs=input_text ) # Launch the app with simple queue demo.queue() # No parameters for older Gradio versions demo.launch()