Add new SentenceTransformer model
Browse files- 1_Pooling/config.json +10 -0
- README.md +567 -0
- config.json +47 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +945 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,567 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
language:
|
3 |
+
- en
|
4 |
+
license: apache-2.0
|
5 |
+
tags:
|
6 |
+
- sentence-transformers
|
7 |
+
- sentence-similarity
|
8 |
+
- feature-extraction
|
9 |
+
- generated_from_trainer
|
10 |
+
- dataset_size:2828
|
11 |
+
- loss:MultipleNegativesRankingLoss
|
12 |
+
base_model: nomic-ai/modernbert-embed-base
|
13 |
+
widget:
|
14 |
+
- source_sentence: The first respects the interest in which the litigation is being
|
15 |
+
prosecuted, and the second is the failure of the plaintiff to either plead or
|
16 |
+
prove a cause of action on his behalf as a stockholder. If this litigation had
|
17 |
+
been honestly instituted by a stockholder for the protection of his and other
|
18 |
+
stockholders ’ rights, and was not so evidently a suit instigated by a rival company
|
19 |
+
for its own interests, we should strive to be astute to discover some remedy for
|
20 |
+
a very evident wrong. The far reaching and flexible nature of equitable powers
|
21 |
+
might, with proper proof and under other circumstances, enable us to do justice
|
22 |
+
as between the stockholders of the Grey Creek Company and Chappell, its officer
|
23 |
+
and director. But we have no inclination to struggle for this result, because
|
24 |
+
it is a well settled principle that whenever it is made to appear that the suit
|
25 |
+
was. not begun in good faith by a shareholder for the protection of his rights,
|
26 |
+
but was in reality originated and prosecuted by another corporation for its own
|
27 |
+
benefit, the court will consider what led the plaintiff to institute his suit,
|
28 |
+
and, finding some other reason than a desire to protect stockholders ’ rights,
|
29 |
+
will refuse to entertain the bill. Forrest v. Manchester, etc., R ' way Co., 4
|
30 |
+
De G., F. & J. 19 ( 65 Eng. Chan., 125 ) ; Filder v. London, etc., R ' way Co.,
|
31 |
+
1 H. & M. 489 ; Belmont v. Erie R ' way Co. et al., 52 Barb. 637 ; Waterbury v.
|
32 |
+
The Merchants ’ Union Express Co., 50 Barb. 157 ; Camblos v. The P. & R. R. R.
|
33 |
+
Co., 4 Brewster, 563. Naturally, the cases respecting this proposition are limited,
|
34 |
+
since the question could not often arise. It seldom happens that shareholders,
|
35 |
+
otherwise than for the protection of their own interests, come into courts of
|
36 |
+
equity to seek redress for wrongs done the corporation of which they are * 331members.
|
37 |
+
But wherever it is apparent that this has been done, the courts have never hesitated
|
38 |
+
to send the plaintiff out of court and refuse him relief.
|
39 |
+
sentences:
|
40 |
+
- When can a shareholder's lawsuit be dismissed for lack of good faith?
|
41 |
+
- What are the requirements for filing a patent application in the United States?
|
42 |
+
- How are disputes over partnership assets and liabilities resolved in court?
|
43 |
+
- source_sentence: It must be conceded that defendant ’ s property within the State
|
44 |
+
is negligible. * 766The salaries of Titus and the other salesman are paid by the
|
45 |
+
defendant ’ s home office. Titus and his associate salesman are employed on a
|
46 |
+
salary basis and devote all their time to the business of the defendant. Titus
|
47 |
+
employs a young woman stenographer and pays her out of the aforementioned “ H.
|
48 |
+
B. Titus, Special ” account. Defendant has no other employees in New York. Titus
|
49 |
+
and his associate are constantly and systematically engaged within the State of
|
50 |
+
New York in soliciting business for the defendant. Their activities result in
|
51 |
+
the continuous shipment by the defendant of its product into and outside of the
|
52 |
+
State of New York. It was testified by Titus that the shipments into this State
|
53 |
+
attain a monthly average of approximately $ 14, 000. Shipments are made in every
|
54 |
+
case from factories without the State “ f. o. b. plant. ” Orders received by Titus
|
55 |
+
from new customers are transmitted to the home office in Cleveland and are there
|
56 |
+
accepted or rejected, presumably after due investigation of the customer ’ s credit
|
57 |
+
standing. In the case of orders received from approved accounts, that is to say,
|
58 |
+
from customers who have previously done business with the defendant and whose
|
59 |
+
credit standing has been found satisfactory by the defendant ’ s home office,
|
60 |
+
and who have thus established a permanent relationship with defendant ’ s New
|
61 |
+
York office, Titus promptly transmits the order to the factory, by means of a
|
62 |
+
teletype machine which the defendant caused to be installed in the 50 Church street
|
63 |
+
office for the use of Titus. This practice is always followed in the case of a
|
64 |
+
rush order from an approved account if the amount of the order is not unusually
|
65 |
+
large ; and the testimony affords some reason to suppose that it is followed in
|
66 |
+
the case of every normal - sized order from such an account. As a general rule,
|
67 |
+
prices are established by the Cleveland office, but Titus was sometimes authorized
|
68 |
+
to quote varying prices in order to meet competition. Orders received on the basis
|
69 |
+
of prices thus quoted by Titus required the approval of the home office, but were,
|
70 |
+
as a matter of fact, in no instance rejected. Defendant ’ s customers in New York
|
71 |
+
make payment directly to the Cleveland office, but when instructed to do so, Titus
|
72 |
+
undertakes the collection of delinquent accounts.
|
73 |
+
sentences:
|
74 |
+
- What factors are considered by courts in determining the best interest of a child
|
75 |
+
in custody cases?
|
76 |
+
- What are the tax implications of freelancing as a sole proprietor?
|
77 |
+
- What constitutes sufficient business activity for a company to be subject to jurisdiction
|
78 |
+
in a state?
|
79 |
+
- source_sentence: The evil is still just as great as it was formerly, if a party
|
80 |
+
can have only legal or equitable relief in the same action. In such case, if he
|
81 |
+
commences his action asking for equitable relief, as for instance a specific performance,
|
82 |
+
and it turns out that he is not entitled to it, but only to legal relief, by way
|
83 |
+
of damages, he might perhaps, if such strictness is to govern, be put to a new
|
84 |
+
action to obtain redress. This certainly ought not to be ; and such a strictness
|
85 |
+
is hostile to the whole spirit of the change that has been made. In trying such
|
86 |
+
a cause at the circuit, I should most certainly allow whatever amendment in the
|
87 |
+
pleadings was necesssary to give the party redress. If the plaintiff had asked
|
88 |
+
for equitable relief, and it turned out that he was entitled to legal relief only,
|
89 |
+
I should permit him to take it in that form. And if he had asked for legal relief
|
90 |
+
only, Avhen he was entitled to both legal and equitable relief, I should allow
|
91 |
+
the proper amendment to administer complete justice in the case. The power to
|
92 |
+
amend, authorized by the Code, is ample for such purpose. Noav the last case of
|
93 |
+
amendment I have mentioned as permissible at the circuit, is precisely what is
|
94 |
+
claimed in this case, with this difference only, that it is claimed to be made
|
95 |
+
here, before issue joined, and when, of course, the defendant has abundant time
|
96 |
+
and opportunity to prepare to meet the claim at the circuit. I see no objection
|
97 |
+
in this case to uniting claims for both legal and equitable relief in the same
|
98 |
+
action. Both depend on the same transaction and both are necessary to indemnify
|
99 |
+
the plaintiff for past, and to protect him against future injury. I think the
|
100 |
+
proper course, under our present system of practice, is to give the party whatever
|
101 |
+
relief is applicable to the facts put * 271in issue in the pleadings and established
|
102 |
+
on the trial, whether such relief be legal or equitable, or both. And I see no
|
103 |
+
reason against uniting in one action claims for both legal and eqiutable relief,
|
104 |
+
when they are not inconsistent with each other ( Linden agt. Hepburn, 5 How. Pr.
|
105 |
+
R. 188 ).
|
106 |
+
sentences:
|
107 |
+
- What are the time requirements for challenging a candidate's qualifications to
|
108 |
+
appear on a ballot in Kentucky?
|
109 |
+
- Can legal and equitable claims be united in one action under modern legal practice?
|
110 |
+
- What are the requirements for filing an international patent application?
|
111 |
+
- source_sentence: 'The major points presented by appellants are, first, that the
|
112 |
+
city of Newark took but an easement in the property, second, that if the city
|
113 |
+
did acquire a fee, it was a conditional, base or determinable fee, and, finally,
|
114 |
+
that in either event the use for which the property was condemned has been abandoned
|
115 |
+
and, in consequence, the property has reverted to the former owner. The city responds
|
116 |
+
that, by virtue of the condemnation proceedings, it acquired an estate in fee
|
117 |
+
- simple absolute, the title to which is not subject to any right of reversion,
|
118 |
+
and, furthermore, that even though the city be found to possess only a qualified
|
119 |
+
fee, it may nevertheless devote the land to the street use. * Page 327 It may
|
120 |
+
be said of a municipality, as it was said of a railroad corporation in Currie
|
121 |
+
v. New York Transit Company and National Docks Railway Co., 66 N. J. Eq. 313,
|
122 |
+
that the quantity of interest in land obtained by it under the power of eminent
|
123 |
+
domain is that which the statute conferring the power authorizes it to acquire
|
124 |
+
and that the legislature may authorize the taking of a fee or any less estate
|
125 |
+
in its discretion. The earlier cases were reviewed by our Chief Justice in the
|
126 |
+
opinion written by him for this court in the Currie case and need not be here
|
127 |
+
adverted to in the continued recognition of the enunciated principle. The next
|
128 |
+
question is : What quantity of interest did the statute which conferred the power
|
129 |
+
of eminent domain authorize the city to acquire? The statute is to be read, not
|
130 |
+
under the necessity of finding fixed phraseology, but to ascertain its intent,
|
131 |
+
because this intent, clearly found, will prevail. No precise words are necessary
|
132 |
+
in a statute to authorize the condemnation of a fee. As was said by Mr. Justice
|
133 |
+
Holmes, then a justice of the Supreme Judicial Court of Massachusetts, in City
|
134 |
+
of Newton v. Perry, 163 Mass. 319 ; 39 N. E. Rep. 1032, " there are no sacramental
|
135 |
+
words which must be used in a statutory power to take and hold lands in order
|
136 |
+
to give a right to take the lands in fee. " See, also, Driscoll v. City of New
|
137 |
+
Haven ( Conn. ), 52 Atl.'
|
138 |
+
sentences:
|
139 |
+
- What legal principles govern equality and uniformity in taxation laws?
|
140 |
+
- What determines the type of interest a municipality can acquire through eminent
|
141 |
+
domain?
|
142 |
+
- What are the requirements for filing a patent application in the United States?
|
143 |
+
- source_sentence: . for one year ” ; this was eventually codified as part of G. L.
|
144 |
+
c. 210, § 3, which also specified other grounds for dispensing with parental consent,
|
145 |
+
such as current imprisonment of the parent for more than three years. Chapter
|
146 |
+
593, § 1, of the Acts of 1953, codified as G. L. c. 210, § 3A, first provided
|
147 |
+
for an independent proceeding, prior to adoption proceedings proper, at which
|
148 |
+
it could be determined whether parental consent was to be necessary for the adoption.
|
149 |
+
Its purpose was to facilitate and expedite the process of adoption of children
|
150 |
+
being held in temporary foster care. See the Department of Public Welfare recommendations,
|
151 |
+
1953 House Doc. No. 118, accompanying their draft bill,. 1953 House Doc. No. 124.
|
152 |
+
The proceeding could be brought by the Department of Public Welfare or any appropriate
|
153 |
+
child care agency having custody of the child. But the act was silent as to the
|
154 |
+
standards to be applied in deciding when consent could be dispensed with, and
|
155 |
+
in Consent to Adoption of a Minor, 345 Mass. 706 ( 1963 ), this court held that,
|
156 |
+
in the absence of any other indication in the statute, the conditions set out
|
157 |
+
in § 3 for direct adoptions were still to be met ; specifically, the court held
|
158 |
+
that a finding of parental “ unsuitability, ” without a finding of * 638wilful
|
159 |
+
desertion or neglect for a year, was not an adequate basis for a decree dispensing
|
160 |
+
with the parental consent. The department had evidently not intended the § 3 conditions
|
161 |
+
to be read into the independent § 3A proceeding. Therefore the department immediately
|
162 |
+
sponsored St. 1964, c. 425, which provided that consent could be dispensed with
|
163 |
+
“ if the court finds that the best interests of the child will be served by placement
|
164 |
+
for adoption ” ; the court was not to be restricted by the § 3 conditions, but
|
165 |
+
was to give “ due regard to the ability, capacity and fitness of the child ’ s
|
166 |
+
parents. . . and to the plans proposed by the department or other agency initiating
|
167 |
+
such petition. ” This statute thus broadened the factors the court could consider
|
168 |
+
in deciding whether to proceed over the parent ’ s objections ; unsuitability
|
169 |
+
besides desertion or neglect was now clearly an available ground.
|
170 |
+
sentences:
|
171 |
+
- What are the legal standards for dispensing with parental consent in adoption
|
172 |
+
cases?
|
173 |
+
- What are the tax implications of inheriting property from a deceased relative?
|
174 |
+
- What legal remedies are available when surface water drainage causes damage to
|
175 |
+
private property?
|
176 |
+
pipeline_tag: sentence-similarity
|
177 |
+
library_name: sentence-transformers
|
178 |
+
metrics:
|
179 |
+
- cosine_accuracy
|
180 |
+
model-index:
|
181 |
+
- name: modernbert-embed-base trained on triplets
|
182 |
+
results:
|
183 |
+
- task:
|
184 |
+
type: triplet
|
185 |
+
name: Triplet
|
186 |
+
dataset:
|
187 |
+
name: dev
|
188 |
+
type: dev
|
189 |
+
metrics:
|
190 |
+
- type: cosine_accuracy
|
191 |
+
value: 0.9979550242424011
|
192 |
+
name: Cosine Accuracy
|
193 |
+
- type: cosine_accuracy
|
194 |
+
value: 0.9979550242424011
|
195 |
+
name: Cosine Accuracy
|
196 |
+
---
|
197 |
+
|
198 |
+
# modernbert-embed-base trained on triplets
|
199 |
+
|
200 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
201 |
+
|
202 |
+
## Model Details
|
203 |
+
|
204 |
+
### Model Description
|
205 |
+
- **Model Type:** Sentence Transformer
|
206 |
+
- **Base model:** [nomic-ai/modernbert-embed-base](https://huggingface.co/nomic-ai/modernbert-embed-base) <!-- at revision d556a88e332558790b210f7bdbe87da2fa94a8d8 -->
|
207 |
+
- **Maximum Sequence Length:** 8192 tokens
|
208 |
+
- **Output Dimensionality:** 768 dimensions
|
209 |
+
- **Similarity Function:** Cosine Similarity
|
210 |
+
<!-- - **Training Dataset:** Unknown -->
|
211 |
+
- **Language:** en
|
212 |
+
- **License:** apache-2.0
|
213 |
+
|
214 |
+
### Model Sources
|
215 |
+
|
216 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
217 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
218 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
219 |
+
|
220 |
+
### Full Model Architecture
|
221 |
+
|
222 |
+
```
|
223 |
+
SentenceTransformer(
|
224 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: ModernBertModel
|
225 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
226 |
+
(2): Normalize()
|
227 |
+
)
|
228 |
+
```
|
229 |
+
|
230 |
+
## Usage
|
231 |
+
|
232 |
+
### Direct Usage (Sentence Transformers)
|
233 |
+
|
234 |
+
First install the Sentence Transformers library:
|
235 |
+
|
236 |
+
```bash
|
237 |
+
pip install -U sentence-transformers
|
238 |
+
```
|
239 |
+
|
240 |
+
Then you can load this model and run inference.
|
241 |
+
```python
|
242 |
+
from sentence_transformers import SentenceTransformer
|
243 |
+
|
244 |
+
# Download from the 🤗 Hub
|
245 |
+
model = SentenceTransformer("rachelFLP/modernbert-embed-base_triples")
|
246 |
+
# Run inference
|
247 |
+
sentences = [
|
248 |
+
'. for one year ” ; this was eventually codified as part of G. L. c. 210, § 3, which also specified other grounds for dispensing with parental consent, such as current imprisonment of the parent for more than three years. Chapter 593, § 1, of the Acts of 1953, codified as G. L. c. 210, § 3A, first provided for an independent proceeding, prior to adoption proceedings proper, at which it could be determined whether parental consent was to be necessary for the adoption. Its purpose was to facilitate and expedite the process of adoption of children being held in temporary foster care. See the Department of Public Welfare recommendations, 1953 House Doc. No. 118, accompanying their draft bill,. 1953 House Doc. No. 124. The proceeding could be brought by the Department of Public Welfare or any appropriate child care agency having custody of the child. But the act was silent as to the standards to be applied in deciding when consent could be dispensed with, and in Consent to Adoption of a Minor, 345 Mass. 706 ( 1963 ), this court held that, in the absence of any other indication in the statute, the conditions set out in § 3 for direct adoptions were still to be met ; specifically, the court held that a finding of parental “ unsuitability, ” without a finding of * 638wilful desertion or neglect for a year, was not an adequate basis for a decree dispensing with the parental consent. The department had evidently not intended the § 3 conditions to be read into the independent § 3A proceeding. Therefore the department immediately sponsored St. 1964, c. 425, which provided that consent could be dispensed with “ if the court finds that the best interests of the child will be served by placement for adoption ” ; the court was not to be restricted by the § 3 conditions, but was to give “ due regard to the ability, capacity and fitness of the child ’ s parents. . . and to the plans proposed by the department or other agency initiating such petition. ” This statute thus broadened the factors the court could consider in deciding whether to proceed over the parent ’ s objections ; unsuitability besides desertion or neglect was now clearly an available ground.',
|
249 |
+
'What are the legal standards for dispensing with parental consent in adoption cases?',
|
250 |
+
'What are the tax implications of inheriting property from a deceased relative?',
|
251 |
+
]
|
252 |
+
embeddings = model.encode(sentences)
|
253 |
+
print(embeddings.shape)
|
254 |
+
# [3, 768]
|
255 |
+
|
256 |
+
# Get the similarity scores for the embeddings
|
257 |
+
similarities = model.similarity(embeddings, embeddings)
|
258 |
+
print(similarities.shape)
|
259 |
+
# [3, 3]
|
260 |
+
```
|
261 |
+
|
262 |
+
<!--
|
263 |
+
### Direct Usage (Transformers)
|
264 |
+
|
265 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
266 |
+
|
267 |
+
</details>
|
268 |
+
-->
|
269 |
+
|
270 |
+
<!--
|
271 |
+
### Downstream Usage (Sentence Transformers)
|
272 |
+
|
273 |
+
You can finetune this model on your own dataset.
|
274 |
+
|
275 |
+
<details><summary>Click to expand</summary>
|
276 |
+
|
277 |
+
</details>
|
278 |
+
-->
|
279 |
+
|
280 |
+
<!--
|
281 |
+
### Out-of-Scope Use
|
282 |
+
|
283 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
284 |
+
-->
|
285 |
+
|
286 |
+
## Evaluation
|
287 |
+
|
288 |
+
### Metrics
|
289 |
+
|
290 |
+
#### Triplet
|
291 |
+
|
292 |
+
* Dataset: `dev`
|
293 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
294 |
+
|
295 |
+
| Metric | Value |
|
296 |
+
|:--------------------|:----------|
|
297 |
+
| **cosine_accuracy** | **0.998** |
|
298 |
+
|
299 |
+
#### Triplet
|
300 |
+
|
301 |
+
* Dataset: `dev`
|
302 |
+
* Evaluated with [<code>TripletEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.TripletEvaluator)
|
303 |
+
|
304 |
+
| Metric | Value |
|
305 |
+
|:--------------------|:----------|
|
306 |
+
| **cosine_accuracy** | **0.998** |
|
307 |
+
|
308 |
+
<!--
|
309 |
+
## Bias, Risks and Limitations
|
310 |
+
|
311 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
312 |
+
-->
|
313 |
+
|
314 |
+
<!--
|
315 |
+
### Recommendations
|
316 |
+
|
317 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
318 |
+
-->
|
319 |
+
|
320 |
+
## Training Details
|
321 |
+
|
322 |
+
### Training Dataset
|
323 |
+
|
324 |
+
#### Unnamed Dataset
|
325 |
+
|
326 |
+
* Size: 2,828 training samples
|
327 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
328 |
+
* Approximate statistics based on the first 1000 samples:
|
329 |
+
| | anchor | positive | negative |
|
330 |
+
|:--------|:-------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
331 |
+
| type | string | string | string |
|
332 |
+
| details | <ul><li>min: 29 tokens</li><li>mean: 403.51 tokens</li><li>max: 483 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 17.59 tokens</li><li>max: 30 tokens</li></ul> | <ul><li>min: 10 tokens</li><li>mean: 14.47 tokens</li><li>max: 23 tokens</li></ul> |
|
333 |
+
* Samples:
|
334 |
+
| anchor | positive | negative |
|
335 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------|
|
336 |
+
| <code>DISTRICT COURT OF APPEAL OF THE STATE OF FLORIDA FOURTH DISTRICT EURICE McGILL, Appellant, v. STATE OF FLORIDA, Appellee. No. 4D17 - 1492 [ August 31, 2017 ] Appeal of order denying rule 3. 850 motion from the Circuit Court for the Seventeenth Judicial Circuit, Broward County ; Paul L. Backman, Judge ; L. T. Case No. 10 - 12523CF10A. Eurice McGill, Lake City, pro se. No appearance required for appellee. PER CURIAM. Affirmed. WARNER, DAMOORGIAN and KUNTZ, JJ., concur. * * * Not final until disposition of timely filed motion for rehearing.</code> | <code>What are the procedural outcomes of appealing a denied rule 3.850 motion in Florida?</code> | <code>What are the tax implications of forming an LLC in Florida?</code> |
|
337 |
+
| <code>Twersky v Incorporated Vil. of Great Neck ( 2015 NY Slip Op 02755 ) Twersky v Incorporated Vil. of Great Neck 2015 NY Slip Op 02755 Decided on April 1, 2015 Appellate Division, Second Department Published by New York State Law Reporting Bureau pursuant to Judiciary Law § 431. This opinion is uncorrected and subject to revision before publication in the Official Reports. Decided on April 1, 2015 SUPREME COURT OF THE STATE OF NEW YORK Appellate Division, Second Judicial Department RANDALL T. ENG, P. J. LEONARD B. AUSTIN JEFFREY A. COHEN BETSY BARROS, JJ. 2014 - 07552 ( Index No. 9576 / 12 ) [ * 1 ] Sharon Twersky, respondent, v Incorporated Village of Great Neck, et al., defendants, FHM Mortgage Corp., et al., appellants. Cascone & Kluepfel, LLP, Garden City, N. Y. ( Howard B. Altman of counsel ), for appellants. Isaacson, Schiowitz & Korson, LLP, Rockville Centre, N. Y. ( Jeremy Schiowitz of counsel ), for respondent. DECISION & ORDER In an action to recover damages for personal injurie...</code> | <code>What is the appellate court's role in reviewing motions for summary judgment in personal injury cases?</code> | <code>What are the tax implications of selling real estate in New York?</code> |
|
338 |
+
| <code>), entered June 17, 2014, as denied their motion for summary judgment dismissing the complaint and all cross claims insofar as asserted against them. ORDERED that the order is affirmed insofar as appealed from, with costs. On the evening of November 18, 2011, the plaintiff, while walking on a sidewalk abutting property then owned by the defendants FHM Mortgage Corp. and Killer B ' s Realty Holding Corp. ( hereinafter together the appellants ), allegedly slipped and fell on a driveway apron covered by a blanket of wet and slimy leaves. The plaintiff testified at her deposition that it was very dark in the area where the accident occurred and that the lamp posts in the vicinity did not provide much illumination. She also testified that the portion of the apron on which she slipped sloped down to meet the driveway. The appellants moved for summary judgment dismissing the complaint and all cross claims insofar as asserted against them. The Supreme Court denied their motion. A property owne...</code> | <code>What is the legal responsibility of property owners for maintaining a safe environment on their premises?</code> | <code>What are the tax implications of selling real estate property for a profit?</code> |
|
339 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
340 |
+
```json
|
341 |
+
{
|
342 |
+
"scale": 20.0,
|
343 |
+
"similarity_fct": "cos_sim"
|
344 |
+
}
|
345 |
+
```
|
346 |
+
|
347 |
+
### Evaluation Dataset
|
348 |
+
|
349 |
+
#### Unnamed Dataset
|
350 |
+
|
351 |
+
* Size: 489 evaluation samples
|
352 |
+
* Columns: <code>anchor</code>, <code>positive</code>, and <code>negative</code>
|
353 |
+
* Approximate statistics based on the first 489 samples:
|
354 |
+
| | anchor | positive | negative |
|
355 |
+
|:--------|:-------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
356 |
+
| type | string | string | string |
|
357 |
+
| details | <ul><li>min: 19 tokens</li><li>mean: 397.16 tokens</li><li>max: 478 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 18.1 tokens</li><li>max: 31 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 14.69 tokens</li><li>max: 22 tokens</li></ul> |
|
358 |
+
* Samples:
|
359 |
+
| anchor | positive | negative |
|
360 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------------------|
|
361 |
+
| <code>Mr. Justice Mercur delivered the opinion of the court, November 20th 1882. Both parties claim title to this land under sheriff ’ s sale as the property of James Strouss. The defendant purchased at a sale made in December 1815, the plaintiff at one made in March 1880. The plaintiff seeks to impeach the validity of the first sale * 411on the ground that it was made in fraud of the creditors of Strouss. The law presumes that a public judicial sale is made in good faith. This presumption stands, unless overthrown by clear and satisfactory evidence of fraud or unfair means. The contention was one of fact. Much evidence Avas given bearing on the question, and some of it conflicting. The learned judge submitted the case to the jury in a clear and correct charge. He instructed them that if the sheriff ’ s sale was made with the intention of hindering, delaying or defeating creditors, and the purchaser had knowledge of such, it was null and void, although the full value of the property may have...</code> | <code>What constitutes fraud in a sheriff’s sale and how does it affect property titles?</code> | <code>What are the requirements for filing a patent application in the United States?</code> |
|
362 |
+
| <code>We think the plaintiff has no reason to complain of this declaration of the law. No error is assigned thereto. Then, as to the application of the evidence tending to establish the fraud, the court affirmed a point of the plaintiff put in these words, “ under the plaintiff ’ s evidence tending to prove fraud on the part of the defendant, the jury will consider all the separate facts in evidence, whether each fact of itself would be sufficient or not to fasten fraud on her in the premises ; and they may consider separate facts, if they are connected by the evidence and tend to prove that the [ defendant entered into and carried out a scheme or plan, to purchase the land in dispute at an under value, and for the benefit of herself, and also for the benefit of James Strouss or his family. ” We do not deem it necessary to consider seriatim the twenty - five specifications of error. We do not think the article of agreement Avas prima facie fraudulent as to creditors ; nor do we see any error...</code> | <code>What legal principles govern the consideration of fraud in contracts involving property disputes?</code> | <code>What are the tax implications of selling inherited property in the United States?</code> |
|
363 |
+
| <code>217 N. J. Super. 541 ( 1987 ) 526 A. 2d 290 ALAN C. STAVER, PLAINTIFF, v. MARGARET STAVER, DEFENDANT. Superior Court of New Jersey, Chancery Division Bergen County, Family Part. March 11, 1987. * 543 Donald L. Garber for plaintiff ( Donald L. Garber, attorney ; Michael I. Lubin on the brief ). John Fiorello for defendant ( Feldman, Feldman, Hoffman & Fiorello, attorneys ). SIMON, MARGUERITE T., J. S. C. Plaintiff husband brings this motion seeking to terminate his obligation to pay alimony to defendant pursuant to a judgment of divorce entered September 6, 1974. Defendant wife brings a cross - motion for enforcement of the judgment. At the time of the entry of the final judgment, plaintiff was employed as an ordained minister earning approximately $ 12, 000 a year. The parties entered into a consensual agreement which was incorporated into the judgment. Two pertinent stipulations of the agreement are as follows : ( 1 ) " Said alimony of $ 500 per month shall continue in effect regardle...</code> | <code>Can alimony obligations be modified or terminated based on retirement and financial changes?</code> | <code>What are the tax implications of inheriting property in New Jersey?</code> |
|
364 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
365 |
+
```json
|
366 |
+
{
|
367 |
+
"scale": 20.0,
|
368 |
+
"similarity_fct": "cos_sim"
|
369 |
+
}
|
370 |
+
```
|
371 |
+
|
372 |
+
### Training Hyperparameters
|
373 |
+
#### Non-Default Hyperparameters
|
374 |
+
|
375 |
+
- `eval_strategy`: steps
|
376 |
+
- `per_device_train_batch_size`: 16
|
377 |
+
- `per_device_eval_batch_size`: 16
|
378 |
+
- `learning_rate`: 2e-05
|
379 |
+
- `num_train_epochs`: 1
|
380 |
+
- `warmup_ratio`: 0.1
|
381 |
+
- `fp16`: True
|
382 |
+
- `batch_sampler`: no_duplicates
|
383 |
+
|
384 |
+
#### All Hyperparameters
|
385 |
+
<details><summary>Click to expand</summary>
|
386 |
+
|
387 |
+
- `overwrite_output_dir`: False
|
388 |
+
- `do_predict`: False
|
389 |
+
- `eval_strategy`: steps
|
390 |
+
- `prediction_loss_only`: True
|
391 |
+
- `per_device_train_batch_size`: 16
|
392 |
+
- `per_device_eval_batch_size`: 16
|
393 |
+
- `per_gpu_train_batch_size`: None
|
394 |
+
- `per_gpu_eval_batch_size`: None
|
395 |
+
- `gradient_accumulation_steps`: 1
|
396 |
+
- `eval_accumulation_steps`: None
|
397 |
+
- `torch_empty_cache_steps`: None
|
398 |
+
- `learning_rate`: 2e-05
|
399 |
+
- `weight_decay`: 0.0
|
400 |
+
- `adam_beta1`: 0.9
|
401 |
+
- `adam_beta2`: 0.999
|
402 |
+
- `adam_epsilon`: 1e-08
|
403 |
+
- `max_grad_norm`: 1.0
|
404 |
+
- `num_train_epochs`: 1
|
405 |
+
- `max_steps`: -1
|
406 |
+
- `lr_scheduler_type`: linear
|
407 |
+
- `lr_scheduler_kwargs`: {}
|
408 |
+
- `warmup_ratio`: 0.1
|
409 |
+
- `warmup_steps`: 0
|
410 |
+
- `log_level`: passive
|
411 |
+
- `log_level_replica`: warning
|
412 |
+
- `log_on_each_node`: True
|
413 |
+
- `logging_nan_inf_filter`: True
|
414 |
+
- `save_safetensors`: True
|
415 |
+
- `save_on_each_node`: False
|
416 |
+
- `save_only_model`: False
|
417 |
+
- `restore_callback_states_from_checkpoint`: False
|
418 |
+
- `no_cuda`: False
|
419 |
+
- `use_cpu`: False
|
420 |
+
- `use_mps_device`: False
|
421 |
+
- `seed`: 42
|
422 |
+
- `data_seed`: None
|
423 |
+
- `jit_mode_eval`: False
|
424 |
+
- `use_ipex`: False
|
425 |
+
- `bf16`: False
|
426 |
+
- `fp16`: True
|
427 |
+
- `fp16_opt_level`: O1
|
428 |
+
- `half_precision_backend`: auto
|
429 |
+
- `bf16_full_eval`: False
|
430 |
+
- `fp16_full_eval`: False
|
431 |
+
- `tf32`: None
|
432 |
+
- `local_rank`: 0
|
433 |
+
- `ddp_backend`: None
|
434 |
+
- `tpu_num_cores`: None
|
435 |
+
- `tpu_metrics_debug`: False
|
436 |
+
- `debug`: []
|
437 |
+
- `dataloader_drop_last`: False
|
438 |
+
- `dataloader_num_workers`: 0
|
439 |
+
- `dataloader_prefetch_factor`: None
|
440 |
+
- `past_index`: -1
|
441 |
+
- `disable_tqdm`: False
|
442 |
+
- `remove_unused_columns`: True
|
443 |
+
- `label_names`: None
|
444 |
+
- `load_best_model_at_end`: False
|
445 |
+
- `ignore_data_skip`: False
|
446 |
+
- `fsdp`: []
|
447 |
+
- `fsdp_min_num_params`: 0
|
448 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
449 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
450 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
451 |
+
- `deepspeed`: None
|
452 |
+
- `label_smoothing_factor`: 0.0
|
453 |
+
- `optim`: adamw_torch
|
454 |
+
- `optim_args`: None
|
455 |
+
- `adafactor`: False
|
456 |
+
- `group_by_length`: False
|
457 |
+
- `length_column_name`: length
|
458 |
+
- `ddp_find_unused_parameters`: None
|
459 |
+
- `ddp_bucket_cap_mb`: None
|
460 |
+
- `ddp_broadcast_buffers`: False
|
461 |
+
- `dataloader_pin_memory`: True
|
462 |
+
- `dataloader_persistent_workers`: False
|
463 |
+
- `skip_memory_metrics`: True
|
464 |
+
- `use_legacy_prediction_loop`: False
|
465 |
+
- `push_to_hub`: False
|
466 |
+
- `resume_from_checkpoint`: None
|
467 |
+
- `hub_model_id`: None
|
468 |
+
- `hub_strategy`: every_save
|
469 |
+
- `hub_private_repo`: None
|
470 |
+
- `hub_always_push`: False
|
471 |
+
- `gradient_checkpointing`: False
|
472 |
+
- `gradient_checkpointing_kwargs`: None
|
473 |
+
- `include_inputs_for_metrics`: False
|
474 |
+
- `include_for_metrics`: []
|
475 |
+
- `eval_do_concat_batches`: True
|
476 |
+
- `fp16_backend`: auto
|
477 |
+
- `push_to_hub_model_id`: None
|
478 |
+
- `push_to_hub_organization`: None
|
479 |
+
- `mp_parameters`:
|
480 |
+
- `auto_find_batch_size`: False
|
481 |
+
- `full_determinism`: False
|
482 |
+
- `torchdynamo`: None
|
483 |
+
- `ray_scope`: last
|
484 |
+
- `ddp_timeout`: 1800
|
485 |
+
- `torch_compile`: False
|
486 |
+
- `torch_compile_backend`: None
|
487 |
+
- `torch_compile_mode`: None
|
488 |
+
- `dispatch_batches`: None
|
489 |
+
- `split_batches`: None
|
490 |
+
- `include_tokens_per_second`: False
|
491 |
+
- `include_num_input_tokens_seen`: False
|
492 |
+
- `neftune_noise_alpha`: None
|
493 |
+
- `optim_target_modules`: None
|
494 |
+
- `batch_eval_metrics`: False
|
495 |
+
- `eval_on_start`: False
|
496 |
+
- `use_liger_kernel`: False
|
497 |
+
- `eval_use_gather_object`: False
|
498 |
+
- `average_tokens_across_devices`: False
|
499 |
+
- `prompts`: None
|
500 |
+
- `batch_sampler`: no_duplicates
|
501 |
+
- `multi_dataset_batch_sampler`: proportional
|
502 |
+
|
503 |
+
</details>
|
504 |
+
|
505 |
+
### Training Logs
|
506 |
+
| Epoch | Step | Validation Loss | dev_cosine_accuracy |
|
507 |
+
|:------:|:----:|:---------------:|:-------------------:|
|
508 |
+
| -1 | -1 | - | 0.9980 |
|
509 |
+
| 0.5650 | 100 | 0.1305 | 0.9980 |
|
510 |
+
| -1 | -1 | - | 0.9980 |
|
511 |
+
|
512 |
+
|
513 |
+
### Framework Versions
|
514 |
+
- Python: 3.11.11
|
515 |
+
- Sentence Transformers: 3.4.1
|
516 |
+
- Transformers: 4.48.3
|
517 |
+
- PyTorch: 2.5.1+cu124
|
518 |
+
- Accelerate: 1.3.0
|
519 |
+
- Datasets: 3.3.2
|
520 |
+
- Tokenizers: 0.21.0
|
521 |
+
|
522 |
+
## Citation
|
523 |
+
|
524 |
+
### BibTeX
|
525 |
+
|
526 |
+
#### Sentence Transformers
|
527 |
+
```bibtex
|
528 |
+
@inproceedings{reimers-2019-sentence-bert,
|
529 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
530 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
531 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
532 |
+
month = "11",
|
533 |
+
year = "2019",
|
534 |
+
publisher = "Association for Computational Linguistics",
|
535 |
+
url = "https://arxiv.org/abs/1908.10084",
|
536 |
+
}
|
537 |
+
```
|
538 |
+
|
539 |
+
#### MultipleNegativesRankingLoss
|
540 |
+
```bibtex
|
541 |
+
@misc{henderson2017efficient,
|
542 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
543 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
544 |
+
year={2017},
|
545 |
+
eprint={1705.00652},
|
546 |
+
archivePrefix={arXiv},
|
547 |
+
primaryClass={cs.CL}
|
548 |
+
}
|
549 |
+
```
|
550 |
+
|
551 |
+
<!--
|
552 |
+
## Glossary
|
553 |
+
|
554 |
+
*Clearly define terms in order to be accessible across audiences.*
|
555 |
+
-->
|
556 |
+
|
557 |
+
<!--
|
558 |
+
## Model Card Authors
|
559 |
+
|
560 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
561 |
+
-->
|
562 |
+
|
563 |
+
<!--
|
564 |
+
## Model Card Contact
|
565 |
+
|
566 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
567 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "nomic-ai/modernbert-embed-base",
|
3 |
+
"architectures": [
|
4 |
+
"ModernBertModel"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 50281,
|
9 |
+
"classifier_activation": "gelu",
|
10 |
+
"classifier_bias": false,
|
11 |
+
"classifier_dropout": 0.0,
|
12 |
+
"classifier_pooling": "mean",
|
13 |
+
"cls_token_id": 50281,
|
14 |
+
"decoder_bias": true,
|
15 |
+
"deterministic_flash_attn": false,
|
16 |
+
"embedding_dropout": 0.0,
|
17 |
+
"eos_token_id": 50282,
|
18 |
+
"global_attn_every_n_layers": 3,
|
19 |
+
"global_rope_theta": 160000.0,
|
20 |
+
"gradient_checkpointing": false,
|
21 |
+
"hidden_activation": "gelu",
|
22 |
+
"hidden_size": 768,
|
23 |
+
"initializer_cutoff_factor": 2.0,
|
24 |
+
"initializer_range": 0.02,
|
25 |
+
"intermediate_size": 1152,
|
26 |
+
"layer_norm_eps": 1e-05,
|
27 |
+
"local_attention": 128,
|
28 |
+
"local_rope_theta": 10000.0,
|
29 |
+
"max_position_embeddings": 8192,
|
30 |
+
"mlp_bias": false,
|
31 |
+
"mlp_dropout": 0.0,
|
32 |
+
"model_type": "modernbert",
|
33 |
+
"norm_bias": false,
|
34 |
+
"norm_eps": 1e-05,
|
35 |
+
"num_attention_heads": 12,
|
36 |
+
"num_hidden_layers": 22,
|
37 |
+
"pad_token_id": 50283,
|
38 |
+
"position_embedding_type": "absolute",
|
39 |
+
"reference_compile": true,
|
40 |
+
"repad_logits_with_grad": false,
|
41 |
+
"sep_token_id": 50282,
|
42 |
+
"sparse_pred_ignore_index": -100,
|
43 |
+
"sparse_prediction": false,
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.48.3",
|
46 |
+
"vocab_size": 50368
|
47 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.48.3",
|
5 |
+
"pytorch": "2.5.1+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8f7d8db881a2f6c1e8c22aeb065edfa05bee5c1a9aa3383fd8ce7c4f0454a0a9
|
3 |
+
size 596070136
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": true,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,945 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "|||IP_ADDRESS|||",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": true,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": false
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<|padding|>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"50254": {
|
20 |
+
"content": " ",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": true,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": false
|
26 |
+
},
|
27 |
+
"50255": {
|
28 |
+
"content": " ",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": true,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": false
|
34 |
+
},
|
35 |
+
"50256": {
|
36 |
+
"content": " ",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": true,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": false
|
42 |
+
},
|
43 |
+
"50257": {
|
44 |
+
"content": " ",
|
45 |
+
"lstrip": false,
|
46 |
+
"normalized": true,
|
47 |
+
"rstrip": false,
|
48 |
+
"single_word": false,
|
49 |
+
"special": false
|
50 |
+
},
|
51 |
+
"50258": {
|
52 |
+
"content": " ",
|
53 |
+
"lstrip": false,
|
54 |
+
"normalized": true,
|
55 |
+
"rstrip": false,
|
56 |
+
"single_word": false,
|
57 |
+
"special": false
|
58 |
+
},
|
59 |
+
"50259": {
|
60 |
+
"content": " ",
|
61 |
+
"lstrip": false,
|
62 |
+
"normalized": true,
|
63 |
+
"rstrip": false,
|
64 |
+
"single_word": false,
|
65 |
+
"special": false
|
66 |
+
},
|
67 |
+
"50260": {
|
68 |
+
"content": " ",
|
69 |
+
"lstrip": false,
|
70 |
+
"normalized": true,
|
71 |
+
"rstrip": false,
|
72 |
+
"single_word": false,
|
73 |
+
"special": false
|
74 |
+
},
|
75 |
+
"50261": {
|
76 |
+
"content": " ",
|
77 |
+
"lstrip": false,
|
78 |
+
"normalized": true,
|
79 |
+
"rstrip": false,
|
80 |
+
"single_word": false,
|
81 |
+
"special": false
|
82 |
+
},
|
83 |
+
"50262": {
|
84 |
+
"content": " ",
|
85 |
+
"lstrip": false,
|
86 |
+
"normalized": true,
|
87 |
+
"rstrip": false,
|
88 |
+
"single_word": false,
|
89 |
+
"special": false
|
90 |
+
},
|
91 |
+
"50263": {
|
92 |
+
"content": " ",
|
93 |
+
"lstrip": false,
|
94 |
+
"normalized": true,
|
95 |
+
"rstrip": false,
|
96 |
+
"single_word": false,
|
97 |
+
"special": false
|
98 |
+
},
|
99 |
+
"50264": {
|
100 |
+
"content": " ",
|
101 |
+
"lstrip": false,
|
102 |
+
"normalized": true,
|
103 |
+
"rstrip": false,
|
104 |
+
"single_word": false,
|
105 |
+
"special": false
|
106 |
+
},
|
107 |
+
"50265": {
|
108 |
+
"content": " ",
|
109 |
+
"lstrip": false,
|
110 |
+
"normalized": true,
|
111 |
+
"rstrip": false,
|
112 |
+
"single_word": false,
|
113 |
+
"special": false
|
114 |
+
},
|
115 |
+
"50266": {
|
116 |
+
"content": " ",
|
117 |
+
"lstrip": false,
|
118 |
+
"normalized": true,
|
119 |
+
"rstrip": false,
|
120 |
+
"single_word": false,
|
121 |
+
"special": false
|
122 |
+
},
|
123 |
+
"50267": {
|
124 |
+
"content": " ",
|
125 |
+
"lstrip": false,
|
126 |
+
"normalized": true,
|
127 |
+
"rstrip": false,
|
128 |
+
"single_word": false,
|
129 |
+
"special": false
|
130 |
+
},
|
131 |
+
"50268": {
|
132 |
+
"content": " ",
|
133 |
+
"lstrip": false,
|
134 |
+
"normalized": true,
|
135 |
+
"rstrip": false,
|
136 |
+
"single_word": false,
|
137 |
+
"special": false
|
138 |
+
},
|
139 |
+
"50269": {
|
140 |
+
"content": " ",
|
141 |
+
"lstrip": false,
|
142 |
+
"normalized": true,
|
143 |
+
"rstrip": false,
|
144 |
+
"single_word": false,
|
145 |
+
"special": false
|
146 |
+
},
|
147 |
+
"50270": {
|
148 |
+
"content": " ",
|
149 |
+
"lstrip": false,
|
150 |
+
"normalized": true,
|
151 |
+
"rstrip": false,
|
152 |
+
"single_word": false,
|
153 |
+
"special": false
|
154 |
+
},
|
155 |
+
"50271": {
|
156 |
+
"content": " ",
|
157 |
+
"lstrip": false,
|
158 |
+
"normalized": true,
|
159 |
+
"rstrip": false,
|
160 |
+
"single_word": false,
|
161 |
+
"special": false
|
162 |
+
},
|
163 |
+
"50272": {
|
164 |
+
"content": " ",
|
165 |
+
"lstrip": false,
|
166 |
+
"normalized": true,
|
167 |
+
"rstrip": false,
|
168 |
+
"single_word": false,
|
169 |
+
"special": false
|
170 |
+
},
|
171 |
+
"50273": {
|
172 |
+
"content": " ",
|
173 |
+
"lstrip": false,
|
174 |
+
"normalized": true,
|
175 |
+
"rstrip": false,
|
176 |
+
"single_word": false,
|
177 |
+
"special": false
|
178 |
+
},
|
179 |
+
"50274": {
|
180 |
+
"content": " ",
|
181 |
+
"lstrip": false,
|
182 |
+
"normalized": true,
|
183 |
+
"rstrip": false,
|
184 |
+
"single_word": false,
|
185 |
+
"special": false
|
186 |
+
},
|
187 |
+
"50275": {
|
188 |
+
"content": " ",
|
189 |
+
"lstrip": false,
|
190 |
+
"normalized": true,
|
191 |
+
"rstrip": false,
|
192 |
+
"single_word": false,
|
193 |
+
"special": false
|
194 |
+
},
|
195 |
+
"50276": {
|
196 |
+
"content": " ",
|
197 |
+
"lstrip": false,
|
198 |
+
"normalized": true,
|
199 |
+
"rstrip": false,
|
200 |
+
"single_word": false,
|
201 |
+
"special": false
|
202 |
+
},
|
203 |
+
"50277": {
|
204 |
+
"content": "|||EMAIL_ADDRESS|||",
|
205 |
+
"lstrip": false,
|
206 |
+
"normalized": true,
|
207 |
+
"rstrip": false,
|
208 |
+
"single_word": false,
|
209 |
+
"special": false
|
210 |
+
},
|
211 |
+
"50278": {
|
212 |
+
"content": "|||PHONE_NUMBER|||",
|
213 |
+
"lstrip": false,
|
214 |
+
"normalized": true,
|
215 |
+
"rstrip": false,
|
216 |
+
"single_word": false,
|
217 |
+
"special": false
|
218 |
+
},
|
219 |
+
"50279": {
|
220 |
+
"content": "<|endoftext|>",
|
221 |
+
"lstrip": false,
|
222 |
+
"normalized": false,
|
223 |
+
"rstrip": false,
|
224 |
+
"single_word": false,
|
225 |
+
"special": true
|
226 |
+
},
|
227 |
+
"50280": {
|
228 |
+
"content": "[UNK]",
|
229 |
+
"lstrip": false,
|
230 |
+
"normalized": false,
|
231 |
+
"rstrip": false,
|
232 |
+
"single_word": false,
|
233 |
+
"special": true
|
234 |
+
},
|
235 |
+
"50281": {
|
236 |
+
"content": "[CLS]",
|
237 |
+
"lstrip": false,
|
238 |
+
"normalized": false,
|
239 |
+
"rstrip": false,
|
240 |
+
"single_word": false,
|
241 |
+
"special": true
|
242 |
+
},
|
243 |
+
"50282": {
|
244 |
+
"content": "[SEP]",
|
245 |
+
"lstrip": false,
|
246 |
+
"normalized": false,
|
247 |
+
"rstrip": false,
|
248 |
+
"single_word": false,
|
249 |
+
"special": true
|
250 |
+
},
|
251 |
+
"50283": {
|
252 |
+
"content": "[PAD]",
|
253 |
+
"lstrip": false,
|
254 |
+
"normalized": false,
|
255 |
+
"rstrip": false,
|
256 |
+
"single_word": false,
|
257 |
+
"special": true
|
258 |
+
},
|
259 |
+
"50284": {
|
260 |
+
"content": "[MASK]",
|
261 |
+
"lstrip": true,
|
262 |
+
"normalized": false,
|
263 |
+
"rstrip": false,
|
264 |
+
"single_word": false,
|
265 |
+
"special": true
|
266 |
+
},
|
267 |
+
"50285": {
|
268 |
+
"content": "[unused0]",
|
269 |
+
"lstrip": false,
|
270 |
+
"normalized": true,
|
271 |
+
"rstrip": false,
|
272 |
+
"single_word": false,
|
273 |
+
"special": false
|
274 |
+
},
|
275 |
+
"50286": {
|
276 |
+
"content": "[unused1]",
|
277 |
+
"lstrip": false,
|
278 |
+
"normalized": true,
|
279 |
+
"rstrip": false,
|
280 |
+
"single_word": false,
|
281 |
+
"special": false
|
282 |
+
},
|
283 |
+
"50287": {
|
284 |
+
"content": "[unused2]",
|
285 |
+
"lstrip": false,
|
286 |
+
"normalized": true,
|
287 |
+
"rstrip": false,
|
288 |
+
"single_word": false,
|
289 |
+
"special": false
|
290 |
+
},
|
291 |
+
"50288": {
|
292 |
+
"content": "[unused3]",
|
293 |
+
"lstrip": false,
|
294 |
+
"normalized": true,
|
295 |
+
"rstrip": false,
|
296 |
+
"single_word": false,
|
297 |
+
"special": false
|
298 |
+
},
|
299 |
+
"50289": {
|
300 |
+
"content": "[unused4]",
|
301 |
+
"lstrip": false,
|
302 |
+
"normalized": true,
|
303 |
+
"rstrip": false,
|
304 |
+
"single_word": false,
|
305 |
+
"special": false
|
306 |
+
},
|
307 |
+
"50290": {
|
308 |
+
"content": "[unused5]",
|
309 |
+
"lstrip": false,
|
310 |
+
"normalized": true,
|
311 |
+
"rstrip": false,
|
312 |
+
"single_word": false,
|
313 |
+
"special": false
|
314 |
+
},
|
315 |
+
"50291": {
|
316 |
+
"content": "[unused6]",
|
317 |
+
"lstrip": false,
|
318 |
+
"normalized": true,
|
319 |
+
"rstrip": false,
|
320 |
+
"single_word": false,
|
321 |
+
"special": false
|
322 |
+
},
|
323 |
+
"50292": {
|
324 |
+
"content": "[unused7]",
|
325 |
+
"lstrip": false,
|
326 |
+
"normalized": true,
|
327 |
+
"rstrip": false,
|
328 |
+
"single_word": false,
|
329 |
+
"special": false
|
330 |
+
},
|
331 |
+
"50293": {
|
332 |
+
"content": "[unused8]",
|
333 |
+
"lstrip": false,
|
334 |
+
"normalized": true,
|
335 |
+
"rstrip": false,
|
336 |
+
"single_word": false,
|
337 |
+
"special": false
|
338 |
+
},
|
339 |
+
"50294": {
|
340 |
+
"content": "[unused9]",
|
341 |
+
"lstrip": false,
|
342 |
+
"normalized": true,
|
343 |
+
"rstrip": false,
|
344 |
+
"single_word": false,
|
345 |
+
"special": false
|
346 |
+
},
|
347 |
+
"50295": {
|
348 |
+
"content": "[unused10]",
|
349 |
+
"lstrip": false,
|
350 |
+
"normalized": true,
|
351 |
+
"rstrip": false,
|
352 |
+
"single_word": false,
|
353 |
+
"special": false
|
354 |
+
},
|
355 |
+
"50296": {
|
356 |
+
"content": "[unused11]",
|
357 |
+
"lstrip": false,
|
358 |
+
"normalized": true,
|
359 |
+
"rstrip": false,
|
360 |
+
"single_word": false,
|
361 |
+
"special": false
|
362 |
+
},
|
363 |
+
"50297": {
|
364 |
+
"content": "[unused12]",
|
365 |
+
"lstrip": false,
|
366 |
+
"normalized": true,
|
367 |
+
"rstrip": false,
|
368 |
+
"single_word": false,
|
369 |
+
"special": false
|
370 |
+
},
|
371 |
+
"50298": {
|
372 |
+
"content": "[unused13]",
|
373 |
+
"lstrip": false,
|
374 |
+
"normalized": true,
|
375 |
+
"rstrip": false,
|
376 |
+
"single_word": false,
|
377 |
+
"special": false
|
378 |
+
},
|
379 |
+
"50299": {
|
380 |
+
"content": "[unused14]",
|
381 |
+
"lstrip": false,
|
382 |
+
"normalized": true,
|
383 |
+
"rstrip": false,
|
384 |
+
"single_word": false,
|
385 |
+
"special": false
|
386 |
+
},
|
387 |
+
"50300": {
|
388 |
+
"content": "[unused15]",
|
389 |
+
"lstrip": false,
|
390 |
+
"normalized": true,
|
391 |
+
"rstrip": false,
|
392 |
+
"single_word": false,
|
393 |
+
"special": false
|
394 |
+
},
|
395 |
+
"50301": {
|
396 |
+
"content": "[unused16]",
|
397 |
+
"lstrip": false,
|
398 |
+
"normalized": true,
|
399 |
+
"rstrip": false,
|
400 |
+
"single_word": false,
|
401 |
+
"special": false
|
402 |
+
},
|
403 |
+
"50302": {
|
404 |
+
"content": "[unused17]",
|
405 |
+
"lstrip": false,
|
406 |
+
"normalized": true,
|
407 |
+
"rstrip": false,
|
408 |
+
"single_word": false,
|
409 |
+
"special": false
|
410 |
+
},
|
411 |
+
"50303": {
|
412 |
+
"content": "[unused18]",
|
413 |
+
"lstrip": false,
|
414 |
+
"normalized": true,
|
415 |
+
"rstrip": false,
|
416 |
+
"single_word": false,
|
417 |
+
"special": false
|
418 |
+
},
|
419 |
+
"50304": {
|
420 |
+
"content": "[unused19]",
|
421 |
+
"lstrip": false,
|
422 |
+
"normalized": true,
|
423 |
+
"rstrip": false,
|
424 |
+
"single_word": false,
|
425 |
+
"special": false
|
426 |
+
},
|
427 |
+
"50305": {
|
428 |
+
"content": "[unused20]",
|
429 |
+
"lstrip": false,
|
430 |
+
"normalized": true,
|
431 |
+
"rstrip": false,
|
432 |
+
"single_word": false,
|
433 |
+
"special": false
|
434 |
+
},
|
435 |
+
"50306": {
|
436 |
+
"content": "[unused21]",
|
437 |
+
"lstrip": false,
|
438 |
+
"normalized": true,
|
439 |
+
"rstrip": false,
|
440 |
+
"single_word": false,
|
441 |
+
"special": false
|
442 |
+
},
|
443 |
+
"50307": {
|
444 |
+
"content": "[unused22]",
|
445 |
+
"lstrip": false,
|
446 |
+
"normalized": true,
|
447 |
+
"rstrip": false,
|
448 |
+
"single_word": false,
|
449 |
+
"special": false
|
450 |
+
},
|
451 |
+
"50308": {
|
452 |
+
"content": "[unused23]",
|
453 |
+
"lstrip": false,
|
454 |
+
"normalized": true,
|
455 |
+
"rstrip": false,
|
456 |
+
"single_word": false,
|
457 |
+
"special": false
|
458 |
+
},
|
459 |
+
"50309": {
|
460 |
+
"content": "[unused24]",
|
461 |
+
"lstrip": false,
|
462 |
+
"normalized": true,
|
463 |
+
"rstrip": false,
|
464 |
+
"single_word": false,
|
465 |
+
"special": false
|
466 |
+
},
|
467 |
+
"50310": {
|
468 |
+
"content": "[unused25]",
|
469 |
+
"lstrip": false,
|
470 |
+
"normalized": true,
|
471 |
+
"rstrip": false,
|
472 |
+
"single_word": false,
|
473 |
+
"special": false
|
474 |
+
},
|
475 |
+
"50311": {
|
476 |
+
"content": "[unused26]",
|
477 |
+
"lstrip": false,
|
478 |
+
"normalized": true,
|
479 |
+
"rstrip": false,
|
480 |
+
"single_word": false,
|
481 |
+
"special": false
|
482 |
+
},
|
483 |
+
"50312": {
|
484 |
+
"content": "[unused27]",
|
485 |
+
"lstrip": false,
|
486 |
+
"normalized": true,
|
487 |
+
"rstrip": false,
|
488 |
+
"single_word": false,
|
489 |
+
"special": false
|
490 |
+
},
|
491 |
+
"50313": {
|
492 |
+
"content": "[unused28]",
|
493 |
+
"lstrip": false,
|
494 |
+
"normalized": true,
|
495 |
+
"rstrip": false,
|
496 |
+
"single_word": false,
|
497 |
+
"special": false
|
498 |
+
},
|
499 |
+
"50314": {
|
500 |
+
"content": "[unused29]",
|
501 |
+
"lstrip": false,
|
502 |
+
"normalized": true,
|
503 |
+
"rstrip": false,
|
504 |
+
"single_word": false,
|
505 |
+
"special": false
|
506 |
+
},
|
507 |
+
"50315": {
|
508 |
+
"content": "[unused30]",
|
509 |
+
"lstrip": false,
|
510 |
+
"normalized": true,
|
511 |
+
"rstrip": false,
|
512 |
+
"single_word": false,
|
513 |
+
"special": false
|
514 |
+
},
|
515 |
+
"50316": {
|
516 |
+
"content": "[unused31]",
|
517 |
+
"lstrip": false,
|
518 |
+
"normalized": true,
|
519 |
+
"rstrip": false,
|
520 |
+
"single_word": false,
|
521 |
+
"special": false
|
522 |
+
},
|
523 |
+
"50317": {
|
524 |
+
"content": "[unused32]",
|
525 |
+
"lstrip": false,
|
526 |
+
"normalized": true,
|
527 |
+
"rstrip": false,
|
528 |
+
"single_word": false,
|
529 |
+
"special": false
|
530 |
+
},
|
531 |
+
"50318": {
|
532 |
+
"content": "[unused33]",
|
533 |
+
"lstrip": false,
|
534 |
+
"normalized": true,
|
535 |
+
"rstrip": false,
|
536 |
+
"single_word": false,
|
537 |
+
"special": false
|
538 |
+
},
|
539 |
+
"50319": {
|
540 |
+
"content": "[unused34]",
|
541 |
+
"lstrip": false,
|
542 |
+
"normalized": true,
|
543 |
+
"rstrip": false,
|
544 |
+
"single_word": false,
|
545 |
+
"special": false
|
546 |
+
},
|
547 |
+
"50320": {
|
548 |
+
"content": "[unused35]",
|
549 |
+
"lstrip": false,
|
550 |
+
"normalized": true,
|
551 |
+
"rstrip": false,
|
552 |
+
"single_word": false,
|
553 |
+
"special": false
|
554 |
+
},
|
555 |
+
"50321": {
|
556 |
+
"content": "[unused36]",
|
557 |
+
"lstrip": false,
|
558 |
+
"normalized": true,
|
559 |
+
"rstrip": false,
|
560 |
+
"single_word": false,
|
561 |
+
"special": false
|
562 |
+
},
|
563 |
+
"50322": {
|
564 |
+
"content": "[unused37]",
|
565 |
+
"lstrip": false,
|
566 |
+
"normalized": true,
|
567 |
+
"rstrip": false,
|
568 |
+
"single_word": false,
|
569 |
+
"special": false
|
570 |
+
},
|
571 |
+
"50323": {
|
572 |
+
"content": "[unused38]",
|
573 |
+
"lstrip": false,
|
574 |
+
"normalized": true,
|
575 |
+
"rstrip": false,
|
576 |
+
"single_word": false,
|
577 |
+
"special": false
|
578 |
+
},
|
579 |
+
"50324": {
|
580 |
+
"content": "[unused39]",
|
581 |
+
"lstrip": false,
|
582 |
+
"normalized": true,
|
583 |
+
"rstrip": false,
|
584 |
+
"single_word": false,
|
585 |
+
"special": false
|
586 |
+
},
|
587 |
+
"50325": {
|
588 |
+
"content": "[unused40]",
|
589 |
+
"lstrip": false,
|
590 |
+
"normalized": true,
|
591 |
+
"rstrip": false,
|
592 |
+
"single_word": false,
|
593 |
+
"special": false
|
594 |
+
},
|
595 |
+
"50326": {
|
596 |
+
"content": "[unused41]",
|
597 |
+
"lstrip": false,
|
598 |
+
"normalized": true,
|
599 |
+
"rstrip": false,
|
600 |
+
"single_word": false,
|
601 |
+
"special": false
|
602 |
+
},
|
603 |
+
"50327": {
|
604 |
+
"content": "[unused42]",
|
605 |
+
"lstrip": false,
|
606 |
+
"normalized": true,
|
607 |
+
"rstrip": false,
|
608 |
+
"single_word": false,
|
609 |
+
"special": false
|
610 |
+
},
|
611 |
+
"50328": {
|
612 |
+
"content": "[unused43]",
|
613 |
+
"lstrip": false,
|
614 |
+
"normalized": true,
|
615 |
+
"rstrip": false,
|
616 |
+
"single_word": false,
|
617 |
+
"special": false
|
618 |
+
},
|
619 |
+
"50329": {
|
620 |
+
"content": "[unused44]",
|
621 |
+
"lstrip": false,
|
622 |
+
"normalized": true,
|
623 |
+
"rstrip": false,
|
624 |
+
"single_word": false,
|
625 |
+
"special": false
|
626 |
+
},
|
627 |
+
"50330": {
|
628 |
+
"content": "[unused45]",
|
629 |
+
"lstrip": false,
|
630 |
+
"normalized": true,
|
631 |
+
"rstrip": false,
|
632 |
+
"single_word": false,
|
633 |
+
"special": false
|
634 |
+
},
|
635 |
+
"50331": {
|
636 |
+
"content": "[unused46]",
|
637 |
+
"lstrip": false,
|
638 |
+
"normalized": true,
|
639 |
+
"rstrip": false,
|
640 |
+
"single_word": false,
|
641 |
+
"special": false
|
642 |
+
},
|
643 |
+
"50332": {
|
644 |
+
"content": "[unused47]",
|
645 |
+
"lstrip": false,
|
646 |
+
"normalized": true,
|
647 |
+
"rstrip": false,
|
648 |
+
"single_word": false,
|
649 |
+
"special": false
|
650 |
+
},
|
651 |
+
"50333": {
|
652 |
+
"content": "[unused48]",
|
653 |
+
"lstrip": false,
|
654 |
+
"normalized": true,
|
655 |
+
"rstrip": false,
|
656 |
+
"single_word": false,
|
657 |
+
"special": false
|
658 |
+
},
|
659 |
+
"50334": {
|
660 |
+
"content": "[unused49]",
|
661 |
+
"lstrip": false,
|
662 |
+
"normalized": true,
|
663 |
+
"rstrip": false,
|
664 |
+
"single_word": false,
|
665 |
+
"special": false
|
666 |
+
},
|
667 |
+
"50335": {
|
668 |
+
"content": "[unused50]",
|
669 |
+
"lstrip": false,
|
670 |
+
"normalized": true,
|
671 |
+
"rstrip": false,
|
672 |
+
"single_word": false,
|
673 |
+
"special": false
|
674 |
+
},
|
675 |
+
"50336": {
|
676 |
+
"content": "[unused51]",
|
677 |
+
"lstrip": false,
|
678 |
+
"normalized": true,
|
679 |
+
"rstrip": false,
|
680 |
+
"single_word": false,
|
681 |
+
"special": false
|
682 |
+
},
|
683 |
+
"50337": {
|
684 |
+
"content": "[unused52]",
|
685 |
+
"lstrip": false,
|
686 |
+
"normalized": true,
|
687 |
+
"rstrip": false,
|
688 |
+
"single_word": false,
|
689 |
+
"special": false
|
690 |
+
},
|
691 |
+
"50338": {
|
692 |
+
"content": "[unused53]",
|
693 |
+
"lstrip": false,
|
694 |
+
"normalized": true,
|
695 |
+
"rstrip": false,
|
696 |
+
"single_word": false,
|
697 |
+
"special": false
|
698 |
+
},
|
699 |
+
"50339": {
|
700 |
+
"content": "[unused54]",
|
701 |
+
"lstrip": false,
|
702 |
+
"normalized": true,
|
703 |
+
"rstrip": false,
|
704 |
+
"single_word": false,
|
705 |
+
"special": false
|
706 |
+
},
|
707 |
+
"50340": {
|
708 |
+
"content": "[unused55]",
|
709 |
+
"lstrip": false,
|
710 |
+
"normalized": true,
|
711 |
+
"rstrip": false,
|
712 |
+
"single_word": false,
|
713 |
+
"special": false
|
714 |
+
},
|
715 |
+
"50341": {
|
716 |
+
"content": "[unused56]",
|
717 |
+
"lstrip": false,
|
718 |
+
"normalized": true,
|
719 |
+
"rstrip": false,
|
720 |
+
"single_word": false,
|
721 |
+
"special": false
|
722 |
+
},
|
723 |
+
"50342": {
|
724 |
+
"content": "[unused57]",
|
725 |
+
"lstrip": false,
|
726 |
+
"normalized": true,
|
727 |
+
"rstrip": false,
|
728 |
+
"single_word": false,
|
729 |
+
"special": false
|
730 |
+
},
|
731 |
+
"50343": {
|
732 |
+
"content": "[unused58]",
|
733 |
+
"lstrip": false,
|
734 |
+
"normalized": true,
|
735 |
+
"rstrip": false,
|
736 |
+
"single_word": false,
|
737 |
+
"special": false
|
738 |
+
},
|
739 |
+
"50344": {
|
740 |
+
"content": "[unused59]",
|
741 |
+
"lstrip": false,
|
742 |
+
"normalized": true,
|
743 |
+
"rstrip": false,
|
744 |
+
"single_word": false,
|
745 |
+
"special": false
|
746 |
+
},
|
747 |
+
"50345": {
|
748 |
+
"content": "[unused60]",
|
749 |
+
"lstrip": false,
|
750 |
+
"normalized": true,
|
751 |
+
"rstrip": false,
|
752 |
+
"single_word": false,
|
753 |
+
"special": false
|
754 |
+
},
|
755 |
+
"50346": {
|
756 |
+
"content": "[unused61]",
|
757 |
+
"lstrip": false,
|
758 |
+
"normalized": true,
|
759 |
+
"rstrip": false,
|
760 |
+
"single_word": false,
|
761 |
+
"special": false
|
762 |
+
},
|
763 |
+
"50347": {
|
764 |
+
"content": "[unused62]",
|
765 |
+
"lstrip": false,
|
766 |
+
"normalized": true,
|
767 |
+
"rstrip": false,
|
768 |
+
"single_word": false,
|
769 |
+
"special": false
|
770 |
+
},
|
771 |
+
"50348": {
|
772 |
+
"content": "[unused63]",
|
773 |
+
"lstrip": false,
|
774 |
+
"normalized": true,
|
775 |
+
"rstrip": false,
|
776 |
+
"single_word": false,
|
777 |
+
"special": false
|
778 |
+
},
|
779 |
+
"50349": {
|
780 |
+
"content": "[unused64]",
|
781 |
+
"lstrip": false,
|
782 |
+
"normalized": true,
|
783 |
+
"rstrip": false,
|
784 |
+
"single_word": false,
|
785 |
+
"special": false
|
786 |
+
},
|
787 |
+
"50350": {
|
788 |
+
"content": "[unused65]",
|
789 |
+
"lstrip": false,
|
790 |
+
"normalized": true,
|
791 |
+
"rstrip": false,
|
792 |
+
"single_word": false,
|
793 |
+
"special": false
|
794 |
+
},
|
795 |
+
"50351": {
|
796 |
+
"content": "[unused66]",
|
797 |
+
"lstrip": false,
|
798 |
+
"normalized": true,
|
799 |
+
"rstrip": false,
|
800 |
+
"single_word": false,
|
801 |
+
"special": false
|
802 |
+
},
|
803 |
+
"50352": {
|
804 |
+
"content": "[unused67]",
|
805 |
+
"lstrip": false,
|
806 |
+
"normalized": true,
|
807 |
+
"rstrip": false,
|
808 |
+
"single_word": false,
|
809 |
+
"special": false
|
810 |
+
},
|
811 |
+
"50353": {
|
812 |
+
"content": "[unused68]",
|
813 |
+
"lstrip": false,
|
814 |
+
"normalized": true,
|
815 |
+
"rstrip": false,
|
816 |
+
"single_word": false,
|
817 |
+
"special": false
|
818 |
+
},
|
819 |
+
"50354": {
|
820 |
+
"content": "[unused69]",
|
821 |
+
"lstrip": false,
|
822 |
+
"normalized": true,
|
823 |
+
"rstrip": false,
|
824 |
+
"single_word": false,
|
825 |
+
"special": false
|
826 |
+
},
|
827 |
+
"50355": {
|
828 |
+
"content": "[unused70]",
|
829 |
+
"lstrip": false,
|
830 |
+
"normalized": true,
|
831 |
+
"rstrip": false,
|
832 |
+
"single_word": false,
|
833 |
+
"special": false
|
834 |
+
},
|
835 |
+
"50356": {
|
836 |
+
"content": "[unused71]",
|
837 |
+
"lstrip": false,
|
838 |
+
"normalized": true,
|
839 |
+
"rstrip": false,
|
840 |
+
"single_word": false,
|
841 |
+
"special": false
|
842 |
+
},
|
843 |
+
"50357": {
|
844 |
+
"content": "[unused72]",
|
845 |
+
"lstrip": false,
|
846 |
+
"normalized": true,
|
847 |
+
"rstrip": false,
|
848 |
+
"single_word": false,
|
849 |
+
"special": false
|
850 |
+
},
|
851 |
+
"50358": {
|
852 |
+
"content": "[unused73]",
|
853 |
+
"lstrip": false,
|
854 |
+
"normalized": true,
|
855 |
+
"rstrip": false,
|
856 |
+
"single_word": false,
|
857 |
+
"special": false
|
858 |
+
},
|
859 |
+
"50359": {
|
860 |
+
"content": "[unused74]",
|
861 |
+
"lstrip": false,
|
862 |
+
"normalized": true,
|
863 |
+
"rstrip": false,
|
864 |
+
"single_word": false,
|
865 |
+
"special": false
|
866 |
+
},
|
867 |
+
"50360": {
|
868 |
+
"content": "[unused75]",
|
869 |
+
"lstrip": false,
|
870 |
+
"normalized": true,
|
871 |
+
"rstrip": false,
|
872 |
+
"single_word": false,
|
873 |
+
"special": false
|
874 |
+
},
|
875 |
+
"50361": {
|
876 |
+
"content": "[unused76]",
|
877 |
+
"lstrip": false,
|
878 |
+
"normalized": true,
|
879 |
+
"rstrip": false,
|
880 |
+
"single_word": false,
|
881 |
+
"special": false
|
882 |
+
},
|
883 |
+
"50362": {
|
884 |
+
"content": "[unused77]",
|
885 |
+
"lstrip": false,
|
886 |
+
"normalized": true,
|
887 |
+
"rstrip": false,
|
888 |
+
"single_word": false,
|
889 |
+
"special": false
|
890 |
+
},
|
891 |
+
"50363": {
|
892 |
+
"content": "[unused78]",
|
893 |
+
"lstrip": false,
|
894 |
+
"normalized": true,
|
895 |
+
"rstrip": false,
|
896 |
+
"single_word": false,
|
897 |
+
"special": false
|
898 |
+
},
|
899 |
+
"50364": {
|
900 |
+
"content": "[unused79]",
|
901 |
+
"lstrip": false,
|
902 |
+
"normalized": true,
|
903 |
+
"rstrip": false,
|
904 |
+
"single_word": false,
|
905 |
+
"special": false
|
906 |
+
},
|
907 |
+
"50365": {
|
908 |
+
"content": "[unused80]",
|
909 |
+
"lstrip": false,
|
910 |
+
"normalized": true,
|
911 |
+
"rstrip": false,
|
912 |
+
"single_word": false,
|
913 |
+
"special": false
|
914 |
+
},
|
915 |
+
"50366": {
|
916 |
+
"content": "[unused81]",
|
917 |
+
"lstrip": false,
|
918 |
+
"normalized": true,
|
919 |
+
"rstrip": false,
|
920 |
+
"single_word": false,
|
921 |
+
"special": false
|
922 |
+
},
|
923 |
+
"50367": {
|
924 |
+
"content": "[unused82]",
|
925 |
+
"lstrip": false,
|
926 |
+
"normalized": true,
|
927 |
+
"rstrip": false,
|
928 |
+
"single_word": false,
|
929 |
+
"special": false
|
930 |
+
}
|
931 |
+
},
|
932 |
+
"clean_up_tokenization_spaces": true,
|
933 |
+
"cls_token": "[CLS]",
|
934 |
+
"extra_special_tokens": {},
|
935 |
+
"mask_token": "[MASK]",
|
936 |
+
"model_input_names": [
|
937 |
+
"input_ids",
|
938 |
+
"attention_mask"
|
939 |
+
],
|
940 |
+
"model_max_length": 8192,
|
941 |
+
"pad_token": "[PAD]",
|
942 |
+
"sep_token": "[SEP]",
|
943 |
+
"tokenizer_class": "PreTrainedTokenizerFast",
|
944 |
+
"unk_token": "[UNK]"
|
945 |
+
}
|