Spaces:

atlasia
/

Masked-LM-Moroccan-Darija

Running on Zero

App Files Files Community

abdeljalilELmajjodi commited on 4 days ago

Commit

b1361ec

verified ·

1 Parent(s): f69e84b

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -1

app.py CHANGED Viewed

@@ -3,11 +3,68 @@ import torch
 from transformers import pipeline
 import os
 import spaces
 #load_dotenv()
 key=os.environ["HF_KEY"]
 def load_model():
     print("[INFO] Loading model... This may take a minute on Spaces")
     pipe = pipeline(
@@ -29,7 +86,13 @@ def predict(text):
     outputs = pipe(text)
     scores= [x["score"] for x in outputs]
     tokens= [x["token_str"] for x in outputs]
-    return {label: float(prob) for label, prob in zip(tokens, scores)}
 # Create Gradio interface
 with gr.Blocks() as demo:

 from transformers import pipeline
 import os
 import spaces
+from huggingface_hub import CommitScheduler
+from pathlib import Path
+import uuid
+import json
+import time
+from datetime import datetime
+import logging
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+    handlers=[
+        logging.FileHandler("app.log"),
+        logging.StreamHandler()
+    ]
+)
+logger = logging.getLogger("darija-masked-lm")
 #load_dotenv()
 key=os.environ["HF_KEY"]
+# Define the file where to save the data
+submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
+feedback_file = submit_file
+# Create directory if it doesn't exist
+submit_file.parent.mkdir(exist_ok=True, parents=True)
+logger.info(f"Created feedback file: {feedback_file}")
+scheduler = CommitScheduler(
+    repo_id="atlasia/atlaset_inference_ds",
+    repo_type="dataset",
+    folder_path=submit_file.parent,
+    path_in_repo="masked_lm",
+    every=5,
+    token=key
+)
+logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")
+def save_feedback(input, output) -> None:
+    """
+    Append input/outputs and parameters to a JSON Lines file using a thread lock
+    to avoid concurrent writes from different users.
+    """
+    logger.info(f"Saving feedback to {feedback_file}")
+    with scheduler.lock:
+        try:
+            with feedback_file.open("a") as f:
+                f.write(json.dumps({
+                    "input": input,
+                    "output": output,
+                }))
+                f.write("\n")
+            logger.info("Feedback saved successfully")
+        except Exception as e:
+            logger.error(f"Error saving feedback: {str(e)}")
 def load_model():
     print("[INFO] Loading model... This may take a minute on Spaces")
     pipe = pipeline(
     outputs = pipe(text)
     scores= [x["score"] for x in outputs]
     tokens= [x["token_str"] for x in outputs]
+    result={label: float(prob) for label, prob in zip(tokens, scores)}
+    # Save feedback with additional metadata
+    save_feedback(
+        text,
+        result
+    )
+    return result
 # Create Gradio interface
 with gr.Blocks() as demo: