Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -3,11 +3,68 @@ import torch
|
|
3 |
from transformers import pipeline
|
4 |
import os
|
5 |
import spaces
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
#load_dotenv()
|
8 |
key=os.environ["HF_KEY"]
|
9 |
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
def load_model():
|
12 |
print("[INFO] Loading model... This may take a minute on Spaces")
|
13 |
pipe = pipeline(
|
@@ -29,7 +86,13 @@ def predict(text):
|
|
29 |
outputs = pipe(text)
|
30 |
scores= [x["score"] for x in outputs]
|
31 |
tokens= [x["token_str"] for x in outputs]
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
# Create Gradio interface
|
35 |
with gr.Blocks() as demo:
|
|
|
3 |
from transformers import pipeline
|
4 |
import os
|
5 |
import spaces
|
6 |
+
from huggingface_hub import CommitScheduler
|
7 |
+
from pathlib import Path
|
8 |
+
import uuid
|
9 |
+
import json
|
10 |
+
import time
|
11 |
+
from datetime import datetime
|
12 |
+
import logging
|
13 |
+
|
14 |
+
# Configure logging
|
15 |
+
logging.basicConfig(
|
16 |
+
level=logging.INFO,
|
17 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
18 |
+
handlers=[
|
19 |
+
logging.FileHandler("app.log"),
|
20 |
+
logging.StreamHandler()
|
21 |
+
]
|
22 |
+
)
|
23 |
+
logger = logging.getLogger("darija-masked-lm")
|
24 |
|
25 |
#load_dotenv()
|
26 |
key=os.environ["HF_KEY"]
|
27 |
|
28 |
|
29 |
+
# Define the file where to save the data
|
30 |
+
submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
|
31 |
+
feedback_file = submit_file
|
32 |
+
|
33 |
+
# Create directory if it doesn't exist
|
34 |
+
submit_file.parent.mkdir(exist_ok=True, parents=True)
|
35 |
+
logger.info(f"Created feedback file: {feedback_file}")
|
36 |
+
|
37 |
+
scheduler = CommitScheduler(
|
38 |
+
repo_id="atlasia/atlaset_inference_ds",
|
39 |
+
repo_type="dataset",
|
40 |
+
folder_path=submit_file.parent,
|
41 |
+
path_in_repo="masked_lm",
|
42 |
+
every=5,
|
43 |
+
token=key
|
44 |
+
)
|
45 |
+
logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")
|
46 |
+
|
47 |
+
|
48 |
+
def save_feedback(input, output) -> None:
|
49 |
+
"""
|
50 |
+
Append input/outputs and parameters to a JSON Lines file using a thread lock
|
51 |
+
to avoid concurrent writes from different users.
|
52 |
+
"""
|
53 |
+
logger.info(f"Saving feedback to {feedback_file}")
|
54 |
+
|
55 |
+
with scheduler.lock:
|
56 |
+
try:
|
57 |
+
with feedback_file.open("a") as f:
|
58 |
+
f.write(json.dumps({
|
59 |
+
"input": input,
|
60 |
+
"output": output,
|
61 |
+
}))
|
62 |
+
f.write("\n")
|
63 |
+
logger.info("Feedback saved successfully")
|
64 |
+
except Exception as e:
|
65 |
+
logger.error(f"Error saving feedback: {str(e)}")
|
66 |
+
|
67 |
+
|
68 |
def load_model():
|
69 |
print("[INFO] Loading model... This may take a minute on Spaces")
|
70 |
pipe = pipeline(
|
|
|
86 |
outputs = pipe(text)
|
87 |
scores= [x["score"] for x in outputs]
|
88 |
tokens= [x["token_str"] for x in outputs]
|
89 |
+
result={label: float(prob) for label, prob in zip(tokens, scores)}
|
90 |
+
# Save feedback with additional metadata
|
91 |
+
save_feedback(
|
92 |
+
text,
|
93 |
+
result
|
94 |
+
)
|
95 |
+
return result
|
96 |
|
97 |
# Create Gradio interface
|
98 |
with gr.Blocks() as demo:
|