abdeljalilELmajjodi's picture
Update app.py
b1361ec verified
import gradio as gr
import torch
from transformers import pipeline
import os
import spaces
from huggingface_hub import CommitScheduler
from pathlib import Path
import uuid
import json
import time
from datetime import datetime
import logging
# Configure logging
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler("app.log"),
logging.StreamHandler()
]
)
logger = logging.getLogger("darija-masked-lm")
#load_dotenv()
key=os.environ["HF_KEY"]
# Define the file where to save the data
submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
feedback_file = submit_file
# Create directory if it doesn't exist
submit_file.parent.mkdir(exist_ok=True, parents=True)
logger.info(f"Created feedback file: {feedback_file}")
scheduler = CommitScheduler(
repo_id="atlasia/atlaset_inference_ds",
repo_type="dataset",
folder_path=submit_file.parent,
path_in_repo="masked_lm",
every=5,
token=key
)
logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")
def save_feedback(input, output) -> None:
"""
Append input/outputs and parameters to a JSON Lines file using a thread lock
to avoid concurrent writes from different users.
"""
logger.info(f"Saving feedback to {feedback_file}")
with scheduler.lock:
try:
with feedback_file.open("a") as f:
f.write(json.dumps({
"input": input,
"output": output,
}))
f.write("\n")
logger.info("Feedback saved successfully")
except Exception as e:
logger.error(f"Error saving feedback: {str(e)}")
def load_model():
print("[INFO] Loading model... This may take a minute on Spaces")
pipe = pipeline(
task="fill-mask",
model="atlasia/XLM-RoBERTa-Morocco",
token=key,
device=0,
torch_dtype=torch.float16 # Use half precision
)
print("[INFO] Model loaded successfully!")
return pipe
print("[INFO] load model ...")
pipe=load_model()
print("[INFO] model loaded")
@spaces.GPU
def predict(text):
outputs = pipe(text)
scores= [x["score"] for x in outputs]
tokens= [x["token_str"] for x in outputs]
result={label: float(prob) for label, prob in zip(tokens, scores)}
# Save feedback with additional metadata
save_feedback(
text,
result
)
return result
# Create Gradio interface
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
# Input text box
input_text = gr.Textbox(
label="Input",
placeholder="Enter text here...",
rtl=True
)
# Button row
with gr.Row():
clear_btn = gr.Button("Clear")
submit_btn = gr.Button("Submit", variant="primary")
# Output probabilities
output_labels = gr.Label(
label="Prediction Results",
show_label=False
)
# Examples section with basic configuration
gr.Examples(
examples=["العاصمة د <mask> هي الرباط","المغرب <mask> زوين","انا سميتي مريم، و كنسكن ف<mask> العاصمة دفلسطين"],
inputs=input_text,
fn=predict,
outputs=output_labels,
cache_examples=True
)
# Button actions
submit_btn.click(
predict,
inputs=input_text,
outputs=output_labels
)
clear_btn.click(
lambda: "",
outputs=input_text
)
# Launch the app with simple queue
demo.queue() # No parameters for older Gradio versions
demo.launch()