abdeljalilELmajjodi commited on
Commit
b1361ec
·
verified ·
1 Parent(s): f69e84b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -1
app.py CHANGED
@@ -3,11 +3,68 @@ import torch
3
  from transformers import pipeline
4
  import os
5
  import spaces
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  #load_dotenv()
8
  key=os.environ["HF_KEY"]
9
 
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def load_model():
12
  print("[INFO] Loading model... This may take a minute on Spaces")
13
  pipe = pipeline(
@@ -29,7 +86,13 @@ def predict(text):
29
  outputs = pipe(text)
30
  scores= [x["score"] for x in outputs]
31
  tokens= [x["token_str"] for x in outputs]
32
- return {label: float(prob) for label, prob in zip(tokens, scores)}
 
 
 
 
 
 
33
 
34
  # Create Gradio interface
35
  with gr.Blocks() as demo:
 
3
  from transformers import pipeline
4
  import os
5
  import spaces
6
+ from huggingface_hub import CommitScheduler
7
+ from pathlib import Path
8
+ import uuid
9
+ import json
10
+ import time
11
+ from datetime import datetime
12
+ import logging
13
+
14
+ # Configure logging
15
+ logging.basicConfig(
16
+ level=logging.INFO,
17
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
18
+ handlers=[
19
+ logging.FileHandler("app.log"),
20
+ logging.StreamHandler()
21
+ ]
22
+ )
23
+ logger = logging.getLogger("darija-masked-lm")
24
 
25
  #load_dotenv()
26
  key=os.environ["HF_KEY"]
27
 
28
 
29
+ # Define the file where to save the data
30
+ submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
31
+ feedback_file = submit_file
32
+
33
+ # Create directory if it doesn't exist
34
+ submit_file.parent.mkdir(exist_ok=True, parents=True)
35
+ logger.info(f"Created feedback file: {feedback_file}")
36
+
37
+ scheduler = CommitScheduler(
38
+ repo_id="atlasia/atlaset_inference_ds",
39
+ repo_type="dataset",
40
+ folder_path=submit_file.parent,
41
+ path_in_repo="masked_lm",
42
+ every=5,
43
+ token=key
44
+ )
45
+ logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")
46
+
47
+
48
+ def save_feedback(input, output) -> None:
49
+ """
50
+ Append input/outputs and parameters to a JSON Lines file using a thread lock
51
+ to avoid concurrent writes from different users.
52
+ """
53
+ logger.info(f"Saving feedback to {feedback_file}")
54
+
55
+ with scheduler.lock:
56
+ try:
57
+ with feedback_file.open("a") as f:
58
+ f.write(json.dumps({
59
+ "input": input,
60
+ "output": output,
61
+ }))
62
+ f.write("\n")
63
+ logger.info("Feedback saved successfully")
64
+ except Exception as e:
65
+ logger.error(f"Error saving feedback: {str(e)}")
66
+
67
+
68
  def load_model():
69
  print("[INFO] Loading model... This may take a minute on Spaces")
70
  pipe = pipeline(
 
86
  outputs = pipe(text)
87
  scores= [x["score"] for x in outputs]
88
  tokens= [x["token_str"] for x in outputs]
89
+ result={label: float(prob) for label, prob in zip(tokens, scores)}
90
+ # Save feedback with additional metadata
91
+ save_feedback(
92
+ text,
93
+ result
94
+ )
95
+ return result
96
 
97
  # Create Gradio interface
98
  with gr.Blocks() as demo: