Gleb Vinarskis
commited on
Commit
·
d978013
1
Parent(s):
c462b34
trying different approach
Browse files- config.json +17 -23
- impresso_langident_wrapper.py +10 -10
config.json
CHANGED
@@ -1,31 +1,25 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "Maslionok/pipeline1",
|
3 |
"architectures": [
|
4 |
-
"
|
5 |
],
|
6 |
-
"
|
7 |
-
|
8 |
-
"impl": "impresso_langident_wrapper.Pipeline_One",
|
9 |
-
"pt": ["AutoModel"],
|
10 |
-
"tf": []
|
11 |
-
}
|
12 |
-
},
|
13 |
"id2label": {
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
},
|
18 |
"label2id": {
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
},
|
23 |
-
"
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
"num_labels": 40
|
31 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "Maslionok/pipeline1/LID-40-3-2000000-1-4.bin",
|
3 |
"architectures": [
|
4 |
+
"Pipeline_One"
|
5 |
],
|
6 |
+
"model_type": "floret",
|
7 |
+
"num_labels": 40,
|
|
|
|
|
|
|
|
|
|
|
8 |
"id2label": {
|
9 |
+
"0": "English",
|
10 |
+
"1": "German",
|
11 |
+
"2": "French"
|
12 |
},
|
13 |
"label2id": {
|
14 |
+
"English": 0,
|
15 |
+
"German": 1,
|
16 |
+
"French": 2
|
17 |
},
|
18 |
+
"custom_pipelines": {
|
19 |
+
"language-detection": {
|
20 |
+
"impl": "Pipeline_One",
|
21 |
+
"pt": [],
|
22 |
+
"tf": []
|
23 |
+
}
|
24 |
+
}
|
|
|
25 |
}
|
impresso_langident_wrapper.py
CHANGED
@@ -1,29 +1,29 @@
|
|
1 |
from transformers import Pipeline
|
2 |
from transformers.pipelines import PIPELINE_REGISTRY
|
3 |
-
|
4 |
|
5 |
|
6 |
class Pipeline_One(Pipeline):
|
7 |
def _sanitize_parameters(self, **kwargs):
|
8 |
-
# Add any additional parameter handling if necessary
|
9 |
return kwargs, {}, {}
|
10 |
|
11 |
def preprocess(self, text, **kwargs):
|
12 |
return text
|
13 |
|
14 |
def _forward(self, inputs):
|
15 |
-
|
|
|
|
|
|
|
|
|
16 |
return model_output
|
17 |
|
18 |
def postprocess(self, outputs, **kwargs):
|
19 |
-
return outputs
|
20 |
-
|
21 |
|
22 |
-
from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
|
23 |
|
|
|
24 |
PIPELINE_REGISTRY.register_pipeline(
|
25 |
"language-detection",
|
26 |
-
pipeline_class=Pipeline_One
|
27 |
-
|
28 |
-
tf_model=TFAutoModelForSequenceClassification,
|
29 |
-
)
|
|
|
1 |
from transformers import Pipeline
|
2 |
from transformers.pipelines import PIPELINE_REGISTRY
|
3 |
+
import floret # Ensure floret is imported
|
4 |
|
5 |
|
6 |
class Pipeline_One(Pipeline):
|
7 |
def _sanitize_parameters(self, **kwargs):
|
|
|
8 |
return kwargs, {}, {}
|
9 |
|
10 |
def preprocess(self, text, **kwargs):
|
11 |
return text
|
12 |
|
13 |
def _forward(self, inputs):
|
14 |
+
# Lazy load the model when first used
|
15 |
+
if not hasattr(self, "model"):
|
16 |
+
self.model = floret.load_model(self.model.config["Maslionok/pipeline1"]) # Load model dynamically
|
17 |
+
|
18 |
+
model_output = self.model.predict([inputs], k=1)
|
19 |
return model_output
|
20 |
|
21 |
def postprocess(self, outputs, **kwargs):
|
22 |
+
return outputs[0] # Return top prediction
|
|
|
23 |
|
|
|
24 |
|
25 |
+
# Register the pipeline
|
26 |
PIPELINE_REGISTRY.register_pipeline(
|
27 |
"language-detection",
|
28 |
+
pipeline_class=Pipeline_One
|
29 |
+
)
|
|
|
|