Spaces:

atlasia
/

Masked-LM-Moroccan-Darija

Running on Zero

App Files Files Community

Masked-LM-Moroccan-Darija / app.py

abdeljalilELmajjodi

Update app.py

b1361ec verified 4 days ago

raw

history blame contribute delete

3.91 kB

	import gradio as gr
	import torch
	from transformers import pipeline
	import os
	import spaces
	from huggingface_hub import CommitScheduler
	from pathlib import Path
	import uuid
	import json
	import time
	from datetime import datetime
	import logging

	# Configure logging
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
	handlers=[
	logging.FileHandler("app.log"),
	logging.StreamHandler()
	]
	)
	logger = logging.getLogger("darija-masked-lm")

	#load_dotenv()
	key=os.environ["HF_KEY"]


	# Define the file where to save the data
	submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
	feedback_file = submit_file

	# Create directory if it doesn't exist
	submit_file.parent.mkdir(exist_ok=True, parents=True)
	logger.info(f"Created feedback file: {feedback_file}")

	scheduler = CommitScheduler(
	repo_id="atlasia/atlaset_inference_ds",
	repo_type="dataset",
	folder_path=submit_file.parent,
	path_in_repo="masked_lm",
	every=5,
	token=key
	)
	logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")


	def save_feedback(input, output) -> None:
	"""
	Append input/outputs and parameters to a JSON Lines file using a thread lock
	to avoid concurrent writes from different users.
	"""
	logger.info(f"Saving feedback to {feedback_file}")

	with scheduler.lock:
	try:
	with feedback_file.open("a") as f:
	f.write(json.dumps({
	"input": input,
	"output": output,
	}))
	f.write("\n")
	logger.info("Feedback saved successfully")
	except Exception as e:
	logger.error(f"Error saving feedback: {str(e)}")


	def load_model():
	print("[INFO] Loading model... This may take a minute on Spaces")
	pipe = pipeline(
	task="fill-mask",
	model="atlasia/XLM-RoBERTa-Morocco",
	token=key,
	device=0,
	torch_dtype=torch.float16 # Use half precision
	)
	print("[INFO] Model loaded successfully!")
	return pipe

	print("[INFO] load model ...")
	pipe=load_model()
	print("[INFO] model loaded")

	@spaces.GPU
	def predict(text):
	outputs = pipe(text)
	scores= [x["score"] for x in outputs]
	tokens= [x["token_str"] for x in outputs]
	result={label: float(prob) for label, prob in zip(tokens, scores)}
	# Save feedback with additional metadata
	save_feedback(
	text,
	result
	)
	return result

	# Create Gradio interface
	with gr.Blocks() as demo:
	with gr.Row():
	with gr.Column():
	# Input text box
	input_text = gr.Textbox(
	label="Input",
	placeholder="Enter text here...",
	rtl=True
	)

	# Button row
	with gr.Row():
	clear_btn = gr.Button("Clear")
	submit_btn = gr.Button("Submit", variant="primary")

	# Output probabilities
	output_labels = gr.Label(
	label="Prediction Results",
	show_label=False
	)

	# Examples section with basic configuration
	gr.Examples(
	examples=["العاصمة د <mask> هي الرباط","المغرب <mask> زوين","انا سميتي مريم، و كنسكن ف<mask> العاصمة دفلسطين"],
	inputs=input_text,
	fn=predict,
	outputs=output_labels,
	cache_examples=True
	)

	# Button actions
	submit_btn.click(
	predict,
	inputs=input_text,
	outputs=output_labels
	)

	clear_btn.click(
	lambda: "",
	outputs=input_text
	)

	# Launch the app with simple queue
	demo.queue() # No parameters for older Gradio versions
	demo.launch()