Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import torch | |
from transformers import pipeline | |
import os | |
import spaces | |
from huggingface_hub import CommitScheduler | |
from pathlib import Path | |
import uuid | |
import json | |
import time | |
from datetime import datetime | |
import logging | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', | |
handlers=[ | |
logging.FileHandler("app.log"), | |
logging.StreamHandler() | |
] | |
) | |
logger = logging.getLogger("darija-masked-lm") | |
#load_dotenv() | |
key=os.environ["HF_KEY"] | |
# Define the file where to save the data | |
submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json" | |
feedback_file = submit_file | |
# Create directory if it doesn't exist | |
submit_file.parent.mkdir(exist_ok=True, parents=True) | |
logger.info(f"Created feedback file: {feedback_file}") | |
scheduler = CommitScheduler( | |
repo_id="atlasia/atlaset_inference_ds", | |
repo_type="dataset", | |
folder_path=submit_file.parent, | |
path_in_repo="masked_lm", | |
every=5, | |
token=key | |
) | |
logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds") | |
def save_feedback(input, output) -> None: | |
""" | |
Append input/outputs and parameters to a JSON Lines file using a thread lock | |
to avoid concurrent writes from different users. | |
""" | |
logger.info(f"Saving feedback to {feedback_file}") | |
with scheduler.lock: | |
try: | |
with feedback_file.open("a") as f: | |
f.write(json.dumps({ | |
"input": input, | |
"output": output, | |
})) | |
f.write("\n") | |
logger.info("Feedback saved successfully") | |
except Exception as e: | |
logger.error(f"Error saving feedback: {str(e)}") | |
def load_model(): | |
print("[INFO] Loading model... This may take a minute on Spaces") | |
pipe = pipeline( | |
task="fill-mask", | |
model="atlasia/XLM-RoBERTa-Morocco", | |
token=key, | |
device=0, | |
torch_dtype=torch.float16 # Use half precision | |
) | |
print("[INFO] Model loaded successfully!") | |
return pipe | |
print("[INFO] load model ...") | |
pipe=load_model() | |
print("[INFO] model loaded") | |
def predict(text): | |
outputs = pipe(text) | |
scores= [x["score"] for x in outputs] | |
tokens= [x["token_str"] for x in outputs] | |
result={label: float(prob) for label, prob in zip(tokens, scores)} | |
# Save feedback with additional metadata | |
save_feedback( | |
text, | |
result | |
) | |
return result | |
# Create Gradio interface | |
with gr.Blocks() as demo: | |
with gr.Row(): | |
with gr.Column(): | |
# Input text box | |
input_text = gr.Textbox( | |
label="Input", | |
placeholder="Enter text here...", | |
rtl=True | |
) | |
# Button row | |
with gr.Row(): | |
clear_btn = gr.Button("Clear") | |
submit_btn = gr.Button("Submit", variant="primary") | |
# Output probabilities | |
output_labels = gr.Label( | |
label="Prediction Results", | |
show_label=False | |
) | |
# Examples section with basic configuration | |
gr.Examples( | |
examples=["العاصمة د <mask> هي الرباط","المغرب <mask> زوين","انا سميتي مريم، و كنسكن ف<mask> العاصمة دفلسطين"], | |
inputs=input_text, | |
fn=predict, | |
outputs=output_labels, | |
cache_examples=True | |
) | |
# Button actions | |
submit_btn.click( | |
predict, | |
inputs=input_text, | |
outputs=output_labels | |
) | |
clear_btn.click( | |
lambda: "", | |
outputs=input_text | |
) | |
# Launch the app with simple queue | |
demo.queue() # No parameters for older Gradio versions | |
demo.launch() |