Gleb Vinarskis commited on
Commit
d978013
·
1 Parent(s): c462b34

trying different approach

Browse files
Files changed (2) hide show
  1. config.json +17 -23
  2. impresso_langident_wrapper.py +10 -10
config.json CHANGED
@@ -1,31 +1,25 @@
1
  {
2
- "_name_or_path": "Maslionok/pipeline1",
3
  "architectures": [
4
- "BertForTokenClassification"
5
  ],
6
- "custom_pipelines": {
7
- "language-detection": {
8
- "impl": "impresso_langident_wrapper.Pipeline_One",
9
- "pt": ["AutoModel"],
10
- "tf": []
11
- }
12
- },
13
  "id2label": {
14
- "0": "English",
15
- "1": "German",
16
- "2": "French"
17
  },
18
  "label2id": {
19
- "English": 0,
20
- "German": 1,
21
- "French": 2
22
  },
23
- "model_type": "bert",
24
- "vocab_size": 2000000,
25
- "embedding_dim": 300,
26
- "hash_count": 4,
27
- "minn": 3,
28
- "maxn": 6,
29
- "bucket": 2000000,
30
- "num_labels": 40
31
  }
 
1
  {
2
+ "_name_or_path": "Maslionok/pipeline1/LID-40-3-2000000-1-4.bin",
3
  "architectures": [
4
+ "Pipeline_One"
5
  ],
6
+ "model_type": "floret",
7
+ "num_labels": 40,
 
 
 
 
 
8
  "id2label": {
9
+ "0": "English",
10
+ "1": "German",
11
+ "2": "French"
12
  },
13
  "label2id": {
14
+ "English": 0,
15
+ "German": 1,
16
+ "French": 2
17
  },
18
+ "custom_pipelines": {
19
+ "language-detection": {
20
+ "impl": "Pipeline_One",
21
+ "pt": [],
22
+ "tf": []
23
+ }
24
+ }
 
25
  }
impresso_langident_wrapper.py CHANGED
@@ -1,29 +1,29 @@
1
  from transformers import Pipeline
2
  from transformers.pipelines import PIPELINE_REGISTRY
3
-
4
 
5
 
6
  class Pipeline_One(Pipeline):
7
  def _sanitize_parameters(self, **kwargs):
8
- # Add any additional parameter handling if necessary
9
  return kwargs, {}, {}
10
 
11
  def preprocess(self, text, **kwargs):
12
  return text
13
 
14
  def _forward(self, inputs):
15
- model_output = self.model.predict(inputs, k=1)
 
 
 
 
16
  return model_output
17
 
18
  def postprocess(self, outputs, **kwargs):
19
- return outputs
20
-
21
 
22
- from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
23
 
 
24
  PIPELINE_REGISTRY.register_pipeline(
25
  "language-detection",
26
- pipeline_class=Pipeline_One,
27
- pt_model=AutoModelForSequenceClassification,
28
- tf_model=TFAutoModelForSequenceClassification,
29
- )
 
1
  from transformers import Pipeline
2
  from transformers.pipelines import PIPELINE_REGISTRY
3
+ import floret # Ensure floret is imported
4
 
5
 
6
  class Pipeline_One(Pipeline):
7
  def _sanitize_parameters(self, **kwargs):
 
8
  return kwargs, {}, {}
9
 
10
  def preprocess(self, text, **kwargs):
11
  return text
12
 
13
  def _forward(self, inputs):
14
+ # Lazy load the model when first used
15
+ if not hasattr(self, "model"):
16
+ self.model = floret.load_model(self.model.config["Maslionok/pipeline1"]) # Load model dynamically
17
+
18
+ model_output = self.model.predict([inputs], k=1)
19
  return model_output
20
 
21
  def postprocess(self, outputs, **kwargs):
22
+ return outputs[0] # Return top prediction
 
23
 
 
24
 
25
+ # Register the pipeline
26
  PIPELINE_REGISTRY.register_pipeline(
27
  "language-detection",
28
+ pipeline_class=Pipeline_One
29
+ )