File size: 3,908 Bytes
a0188fc
 
 
 
fd96f46
b1361ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0188fc
 
a81b12c
a0188fc
 
b1361ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0188fc
8113c0b
 
 
f69e84b
8113c0b
 
 
 
 
a0188fc
 
 
 
 
 
fd96f46
a0188fc
fd96f46
 
 
b1361ec
 
 
 
 
 
 
a0188fc
 
 
 
 
 
 
 
3ad7345
 
a0188fc
 
 
 
 
 
 
 
 
7849f7f
849d516
 
 
aad18c2
849d516
 
 
aad18c2
 
849d516
a0188fc
 
 
 
 
 
aad18c2
a0188fc
 
 
 
 
 
 
aad18c2
 
849d516
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import gradio as gr
import torch
from transformers import pipeline
import os
import spaces
from huggingface_hub import CommitScheduler
from pathlib import Path
import uuid
import json
import time
from datetime import datetime
import logging

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("app.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("darija-masked-lm")

#load_dotenv()
key=os.environ["HF_KEY"]


# Define the file where to save the data
submit_file = Path("user_submit/") / f"data_{uuid.uuid4()}.json"
feedback_file = submit_file

# Create directory if it doesn't exist
submit_file.parent.mkdir(exist_ok=True, parents=True)
logger.info(f"Created feedback file: {feedback_file}")

scheduler = CommitScheduler(
    repo_id="atlasia/atlaset_inference_ds",
    repo_type="dataset",
    folder_path=submit_file.parent,
    path_in_repo="masked_lm",
    every=5,
    token=key
)
logger.info(f"Initialized CommitScheduler for repo: atlasia/atlaset_inference_ds")


def save_feedback(input, output) -> None:
    """
    Append input/outputs and parameters to a JSON Lines file using a thread lock
    to avoid concurrent writes from different users.
    """
    logger.info(f"Saving feedback to {feedback_file}")
    
    with scheduler.lock:
        try:
            with feedback_file.open("a") as f:
                f.write(json.dumps({
                    "input": input, 
                    "output": output, 
                }))
                f.write("\n")
            logger.info("Feedback saved successfully")
        except Exception as e:
            logger.error(f"Error saving feedback: {str(e)}")


def load_model():
    print("[INFO] Loading model... This may take a minute on Spaces")
    pipe = pipeline(
        task="fill-mask",
        model="atlasia/XLM-RoBERTa-Morocco",
        token=key,
        device=0,
        torch_dtype=torch.float16  # Use half precision
    )
    print("[INFO] Model loaded successfully!")
    return pipe

print("[INFO] load model ...")
pipe=load_model()
print("[INFO] model loaded")

@spaces.GPU
def predict(text):
    outputs = pipe(text)
    scores= [x["score"] for x in outputs]
    tokens= [x["token_str"] for x in outputs]
    result={label: float(prob) for label, prob in zip(tokens, scores)}
    # Save feedback with additional metadata
    save_feedback(
        text, 
        result
    )
    return result

# Create Gradio interface
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            # Input text box
            input_text = gr.Textbox(
                label="Input",
                placeholder="Enter text here...",
                rtl=True
            )
            
            # Button row
            with gr.Row():
                clear_btn = gr.Button("Clear")
                submit_btn = gr.Button("Submit", variant="primary")
            
            # Output probabilities
            output_labels = gr.Label(
                label="Prediction Results",
                show_label=False
            )
            
            # Examples section with basic configuration
            gr.Examples(
                examples=["العاصمة د <mask> هي الرباط","المغرب <mask> زوين","انا سميتي مريم، و كنسكن ف<mask> العاصمة دفلسطين"],
                inputs=input_text,
                fn=predict,
                outputs=output_labels,
                cache_examples=True
            )

    # Button actions
    submit_btn.click(
        predict,
        inputs=input_text,
        outputs=output_labels
    )
    
    clear_btn.click(
        lambda: "",
        outputs=input_text
    )

# Launch the app with simple queue
demo.queue()  # No parameters for older Gradio versions
demo.launch()