Add new SentenceTransformer model.
Browse files- 1_Pooling/config.json +10 -0
- README.md +814 -0
- config.json +25 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +14 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +37 -0
- tokenizer.json +0 -0
- tokenizer_config.json +65 -0
- vocab.txt +0 -0
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 512,
|
3 |
+
"pooling_mode_cls_token": false,
|
4 |
+
"pooling_mode_mean_tokens": true,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,814 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:33870508
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
widget:
|
10 |
+
- source_sentence: 'Wire-Free Targeted Axillary Dissection: A Pooled Analysis of 1300+
|
11 |
+
Cases Post-Neoadjuvant Systemic Therapy in Node-Positive Early Breast Cancer.'
|
12 |
+
sentences:
|
13 |
+
- Transdiagnostic behavior therapy.
|
14 |
+
- Recent advances in neoadjuvant systemic therapy between SLNB and MLNB was demonstrated.
|
15 |
+
Notably, 49 days of cases, respectively. MLNB inclusion in axillary staging post-NST
|
16 |
+
for initially node-positive patients is crucial. The radiation-free Savi Scout,
|
17 |
+
with its minimal MRI artefacts, is the preferred technology for TAD.
|
18 |
+
- Delirium affects approximately 20% to 25% of patients undergoing cardiac surgery
|
19 |
+
and is particularly common in older adults. This article reviews the etiology
|
20 |
+
and risk factors for delirium associated with cardiac surgery in older adults.
|
21 |
+
Delirium screening, prevention, and treatment strategies, including both pharmacological
|
22 |
+
and nonpharmacological therapies, are presented. Interventions appropriate in
|
23 |
+
both the intensive care unit and.
|
24 |
+
- source_sentence: Experimental studies on the geometrical characteristics determining
|
25 |
+
the system behavior of surface tension autooscillations.
|
26 |
+
sentences:
|
27 |
+
- Autooscillation of the surface tension is a phenomenon related to Marangoni instability
|
28 |
+
periodically arising and fading by dissolution of a surfactant droplet under a
|
29 |
+
water-air interface. A detailed experimental investigation was performed to clear
|
30 |
+
up the influence of the system geometry on development and characteristics of
|
31 |
+
autooscillations. It was found that the aspect ratio is an additional dimensionless
|
32 |
+
parameter that determines the system behavior equally to the Marangoni number.
|
33 |
+
The influence of the cell diameter, capillary immersion depth, and droplet radius
|
34 |
+
on the autooscillation period and amplitude was studied as well.
|
35 |
+
- Genome-wide methylation profiling is used in breast cancer (BC) studies, because
|
36 |
+
DNA methylation is a crucial epigenetic regulator of gene expression, involved
|
37 |
+
in many diseases including BC. We investigated genome-wide methylation profiles
|
38 |
+
in both canine mammary tumor (CMT) tissues and peripheral blood mononuclear cells
|
39 |
+
(PBMCs) using reduced representation bisulfite sequencing (RRBS) and found unique
|
40 |
+
CMT-enriched methylation signatures. A total of 2.2–4.2 million cytosine–phosphate–guanine
|
41 |
+
(CpG) sites were analyzed in both CMT tissues and PBMCs, which included 40,000
|
42 |
+
and 28,000 differentially methylated regions (DMRs) associated with 341 and 247
|
43 |
+
promoters of differentially methylated genes (DMGs) in CMT tissues and PBMCs,
|
44 |
+
respectively. Genes related to apoptosis and ion transmembrane transport were
|
45 |
+
hypermethylated, but cell proliferation and oncogene were hypomethylated in tumor
|
46 |
+
tissues. Gene ontology analysis using DMGs in PBMCs revealed significant methylation
|
47 |
+
changes in the subset of immune cells and host defense system-related genes, especially
|
48 |
+
chemokine signaling pathway-related genes. Moreover, a number of CMT tissue-enriched
|
49 |
+
DMRs were identified from the promoter regions of various microRNAs (miRNAs),
|
50 |
+
including cfa-mir-96 and cfa-mir-149, which were reported as cancer-associated
|
51 |
+
miRNAs in humans. We also identified novel miRNAs associated with CMT which can
|
52 |
+
be candidates for new miRNAs associated with human BC. This study may provide
|
53 |
+
new insight for a better understanding of aberrant methylation associated with
|
54 |
+
both human BC and CMT, as well as possible targets for methylation-based BC diagnostic
|
55 |
+
markers.
|
56 |
+
- Urine estrogens were measured in 46 women students, ages 15-18, at a middle-class
|
57 |
+
high school in Athens and in 40 women of the same age residing at one of three
|
58 |
+
orphanages in the same city. The lower socioeconomic status (SES) of the latter
|
59 |
+
group was documented by their lower mean height (by 5.2 cm) and weight (by 5.3
|
60 |
+
kg) relative to the high school students. Both in follicular and luteal phases
|
61 |
+
of the menstrual cycle, the women with lower SES had 50% higher estriol ratios
|
62 |
+
(ratio of the concentration of estriol to the sum of the concentrations of estrone
|
63 |
+
and estradiol). In luteal specimens the concentration of all three major estrogens
|
64 |
+
was higher in the group with low SES than in the women in the other group, but
|
65 |
+
the concentration of estriol was most increased. There was also an indication
|
66 |
+
of less frequent anovular cycles among the women with low SES. These findings
|
67 |
+
are consistent with hypotheses linking either the estriol ratio or the frequency
|
68 |
+
of anovular cycles to breast cancer risk.
|
69 |
+
- source_sentence: Iatrogenic superior vena cava syndrome treated with streptokinase.
|
70 |
+
sentences:
|
71 |
+
- The literature tells us that reflection offers a means to evaluate practice and
|
72 |
+
to identify learning from our practice experiences. The following description
|
73 |
+
of a practice incident will be discussed loosely in the light of Rolfe's 'Model
|
74 |
+
of Nursing Praxis' as a means of exploring the theoretical exercise of 'reflection'
|
75 |
+
within a proposed theoretical framework. It is hoped that the exercise will help
|
76 |
+
to achieve some of the suggested positive endpoints of reflection, and provide
|
77 |
+
insight and learning on an incident that was particularly powerful on both a personal
|
78 |
+
and a professional level.
|
79 |
+
- 'BACKGROUND: This study reported height prediction and longitudinal growth changes
|
80 |
+
in Chinese pediatric patients with acute myeloid leukemia (AML) during and after
|
81 |
+
treatment and their associations with outcomes. METHODS: Changes in 88 children
|
82 |
+
with AML in percentages according to the growth percentile curve for Chinese boys/girls
|
83 |
+
aged 2-18/0-2 years for body mass index (BMI), height, and weight from the time
|
84 |
+
of diagnosis to 2 years off therapy were evaluated. The outcomes of AML were compared
|
85 |
+
among patients with different BMI levels. RESULTS: The proportion of underweight
|
86 |
+
children (weight < 5th percentile) increased significantly from the initial diagnosis
|
87 |
+
to the end of consolidation treatment. The proportion of patients with low BMI
|
88 |
+
(BMI < 5th percentile) was highest (23.08%) during the consolidation phase, and
|
89 |
+
no children were underweight, but 20% were overweight (BMI > 75th percentile)
|
90 |
+
after 2 years of drug withdrawal. Unhealthy BMI at the initial diagnosis and during
|
91 |
+
intensive chemotherapy leads to poorer outcomes. For height, all patients were
|
92 |
+
in the range of genetic height predicted based on their parents'' height at final
|
93 |
+
follow-up. CONCLUSIONS: Physicians should pay more attention to the changes in
|
94 |
+
height and weight of children with AML at these crucial treatment stages and intervene
|
95 |
+
in time.'
|
96 |
+
- The development of an iatrogenic superior vena cava syndrome secondary to a thrombosis
|
97 |
+
from an indwelling Hickman catheter in a patient with ovarian carcinoma is presented.
|
98 |
+
The patient was treated with a combination of streptokinase and heparin with successful
|
99 |
+
and dramatic results. Streptokinase appears to be highly effective in the treatment
|
100 |
+
of iatrogenic superior vena cava syndrome from Hickman catheters. It appears that
|
101 |
+
the Hickman catheter may be safely left in situ post-treatment.
|
102 |
+
- source_sentence: Cesarean delivery in a parturient with syringomyelia and worsening
|
103 |
+
neurological symptoms.
|
104 |
+
sentences:
|
105 |
+
- A parturient presented at 35 weeks' gestation with worsening neurological symptoms
|
106 |
+
caused by syringomyelia. She underwent urgent cesarean delivery. The etiology
|
107 |
+
and anesthetic considerations for an obstetrical patient with syringomyelia are
|
108 |
+
discussed.
|
109 |
+
- Attachment of enterotoxigenic Escherichia coli to the human gut is considered
|
110 |
+
an important early step in infection that leads to diarrhea. This attachment is
|
111 |
+
mediated by pili, which belong to a limited number of serologically distinguishable
|
112 |
+
types. Many of these pili require the product of rns, or a closely related gene,
|
113 |
+
for their expression. We have located the major promoter for rns and found that
|
114 |
+
although its sequence diverges significantly from a sigma-70 promoter consensus
|
115 |
+
sequence, it is very strong. Transcription of rns is negatively regulated both
|
116 |
+
at a region upstream of this promoter and at a region internal to the rns open
|
117 |
+
reading frame. In addition, rns positively regulates its own transcription, probably
|
118 |
+
by counteracting these two negative effects.
|
119 |
+
- 'Purpose: Research exploring how places shape and interact with the lives of aging
|
120 |
+
adults must be grounded in the places where aging adults live and participate.
|
121 |
+
Combined participatory geospatial and qualitative methods have the potential to
|
122 |
+
illuminate the complex processes enacted between person and place to create much-needed
|
123 |
+
knowledge in this area. The purpose of this scoping review was to identify methods
|
124 |
+
that can be used to study person-place relationships among aging adults and their
|
125 |
+
neighborhoods by determining the extent and nature of research with aging adults
|
126 |
+
that combines qualitative methods with participatory geospatial methods. Design
|
127 |
+
and Methods: A systematic search of nine databases identified 1,965 articles published
|
128 |
+
from 1995 to late 2015. We extracted data and assessed whether the geospatial
|
129 |
+
and qualitative methods were supported by a specified methodology, the methods
|
130 |
+
of data analysis, and the extent of integration of geospatial and qualitative
|
131 |
+
methods. Results: Fifteen studies were included and used the photovoice method,
|
132 |
+
global positioning system tracking plus interview, or go-along interviews. Most
|
133 |
+
included articles provided sufficient detail about data collection methods, yet
|
134 |
+
limited detail about methodologies supporting the study designs and/or data analysis.
|
135 |
+
Implications: Approaches that combine participatory geospatial and qualitative
|
136 |
+
methods are beginning to emerge in the aging literature. By more explicitly grounding
|
137 |
+
studies in a methodology, better integrating different types of data during analysis,
|
138 |
+
and reflecting on methods as they are applied, these methods can be further developed
|
139 |
+
and utilized to provide crucial place-based knowledge that can support aging adults''
|
140 |
+
health, well-being, engagement, and participation.'
|
141 |
+
- source_sentence: Development of an in vitro regeneration system from immature inflorescences
|
142 |
+
and CRISPR/Cas9-mediated gene editing in sudangrass.
|
143 |
+
sentences:
|
144 |
+
- HIV envelope protein (Env) is the sole target of broadly neutralizing antibodies
|
145 |
+
(BNAbs) that are capable of neutralizing diverse strains of HIV. While BNAbs develop
|
146 |
+
spontaneously in a subset of HIV-infected patients, efforts to design an envelope
|
147 |
+
protein-based immunogen to elicit broadly neutralizing antibody responses have
|
148 |
+
so far been unsuccessful. It is hypothesized that a primary barrier to eliciting
|
149 |
+
BNAbs is the fact that HIV envelope proteins bind poorly to the germline-encoded
|
150 |
+
unmutated common ancestor (UCA) precursors to BNAbs. To identify variant forms
|
151 |
+
of Env with increased affinities for the UCA forms of BNAbs 4E10 and 10E8, which
|
152 |
+
target the Membrane Proximal External Region (MPER) of Env, libraries of randomly
|
153 |
+
mutated Env variants were expressed in a yeast surface display system and screened
|
154 |
+
using fluorescence activated cell sorting for cells displaying variants with enhanced
|
155 |
+
abilities to bind the UCA antibodies. Based on analyses of individual clones obtained
|
156 |
+
from the screen and on next-generation sequencing of sorted libraries, distinct
|
157 |
+
but partially overlapping sets of amino acid substitutions conferring enhanced
|
158 |
+
UCA antibody binding were identified. These were particularly enriched in substitutions
|
159 |
+
of arginine for highly conserved tryptophan residues. The UCA-binding variants
|
160 |
+
also generally exhibited enhanced binding to the mature forms of anti-MPER antibodies.
|
161 |
+
Mapping of the identified substitutions into available structures of Env suggest
|
162 |
+
that they may act by destabilizing both the initial pre-fusion conformation and
|
163 |
+
the six-helix bundle involved in fusion of the viral and cell membranes, as well
|
164 |
+
as providing new or expanded epitopes with increased accessibility for the UCA
|
165 |
+
antibodies.
|
166 |
+
- 'BACKGROUND: Sudangrass (Sorghum sudanense) is a major biomass producer for livestock
|
167 |
+
feed and biofuel in many countries. It has a wide range of adaptations for growing
|
168 |
+
on marginal lands under biotic and abiotic stresses. The immature inflorescence
|
169 |
+
is an explant with high embryogenic competence and is frequently used to regenerate
|
170 |
+
different sorghum cultivars. Caffeic acid O-methyl transferase (COMT) is a key
|
171 |
+
enzyme in the lignin biosynthesis pathway, which limits ruminant digestion of
|
172 |
+
forage cell walls and is a crucial barrier in the conversion of plant biomass
|
173 |
+
to bioethanol. Genome editing by CRISPR/Cas9-mediated mutagenesis without a transgenic
|
174 |
+
footprint will accelerate the improvement and facilitate regulatory approval and
|
175 |
+
commercialization of biotech crops. METHODS AND RESULTS: We report the overcome
|
176 |
+
of the recalcitrance in sudangrass transformation and regeneration in order to
|
177 |
+
use genome editing technique. Hence, an efficient regeneration system has been
|
178 |
+
established to induce somatic embryogenesis from the immature inflorescence of
|
179 |
+
two sudangrass cultivars on four MS-based media supplemented with different components.
|
180 |
+
Our results indicate an interaction between genotype and medium composition. The
|
181 |
+
combination of Giza-1 cultivar and M4 medium produces the maximum frequency of
|
182 |
+
embryogenic calli of 80% and subsequent regeneration efficiency of 22.6%. Precise
|
183 |
+
mutagenesis of the COMT gene is executed using the CRISPR/Cas9 system with the
|
184 |
+
potential to reduce lignin content and enhance forage and biomass quality in sudangrass.
|
185 |
+
CONCLUSION: A reliable regeneration and transformation system has been established
|
186 |
+
for sudangrass using immature inflorescence, and the CRISPR/Cas9 system has demonstrated
|
187 |
+
a promising technology for genome editing. The outcomes of this research will
|
188 |
+
pave the road for further improvement of various sorghum genotypes to meet the
|
189 |
+
global demand for food, feed, and biofuels, achieving sustainable development
|
190 |
+
goals (SDGs).'
|
191 |
+
- 'The synthesis of an extracellular matrix containing long (approximately mm in
|
192 |
+
length) collagen fibrils is fundamental to the normal morphogenesis of animal
|
193 |
+
tissues. In this study we have direct evidence that fibroblasts synthesise transient
|
194 |
+
early fibril intermediates (approximately 1 micrometer in length) that interact
|
195 |
+
by tip-to-tip fusion to generate long fibrils seen in older tissues. Examination
|
196 |
+
of early collagen fibrils from tendon showed that two types of early fibrils occur:
|
197 |
+
unipolar fibrils (with carboxyl (C) and amino (N) ends) and bipolar fibrils (with
|
198 |
+
two N-ends). End-to-end fusion requires the C-end of a unipolar fibril. Proteoglycans
|
199 |
+
coated the shafts of the fibrils but not the tips. In the absence of proteoglycans
|
200 |
+
the fibrils aggregated by side-to-side interactions. Therefore, proteoglycans
|
201 |
+
promote tip-to-tip fusion and inhibit side-to-side fusion. This distribution of
|
202 |
+
proteoglycan along the fibril required co-assembly of collagen and proteoglycan
|
203 |
+
prior to fibril assembly. The study showed that collagen fibrillogenesis is a
|
204 |
+
hierarchical process that depends on the unique structure of unipolar fibrils
|
205 |
+
and a novel function of proteoglycans.'
|
206 |
+
pipeline_tag: sentence-similarity
|
207 |
+
library_name: sentence-transformers
|
208 |
+
---
|
209 |
+
|
210 |
+
# SentenceTransformer
|
211 |
+
|
212 |
+
This is a [sentence-transformers](https://www.SBERT.net) model trained on the parquet dataset. It maps sentences & paragraphs to a 512-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
213 |
+
|
214 |
+
## Model Details
|
215 |
+
|
216 |
+
### Model Description
|
217 |
+
- **Model Type:** Sentence Transformer
|
218 |
+
<!-- - **Base model:** [Unknown](https://huggingface.co/unknown) -->
|
219 |
+
- **Maximum Sequence Length:** 512 tokens
|
220 |
+
- **Output Dimensionality:** 512 dimensions
|
221 |
+
- **Similarity Function:** Cosine Similarity
|
222 |
+
- **Training Dataset:**
|
223 |
+
- parquet
|
224 |
+
<!-- - **Language:** Unknown -->
|
225 |
+
<!-- - **License:** Unknown -->
|
226 |
+
|
227 |
+
### Model Sources
|
228 |
+
|
229 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
230 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
231 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
232 |
+
|
233 |
+
### Full Model Architecture
|
234 |
+
|
235 |
+
```
|
236 |
+
SentenceTransformer(
|
237 |
+
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
|
238 |
+
(1): Pooling({'word_embedding_dimension': 512, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
239 |
+
)
|
240 |
+
```
|
241 |
+
|
242 |
+
## Usage
|
243 |
+
|
244 |
+
### Direct Usage (Sentence Transformers)
|
245 |
+
|
246 |
+
First install the Sentence Transformers library:
|
247 |
+
|
248 |
+
```bash
|
249 |
+
pip install -U sentence-transformers
|
250 |
+
```
|
251 |
+
|
252 |
+
Then you can load this model and run inference.
|
253 |
+
```python
|
254 |
+
from sentence_transformers import SentenceTransformer
|
255 |
+
|
256 |
+
# Download from the 🤗 Hub
|
257 |
+
model = SentenceTransformer("pankajrajdeo/Bioformer-8L-UMLS-Pubmed_PMC-Random_TCE-Epoch-1")
|
258 |
+
# Run inference
|
259 |
+
sentences = [
|
260 |
+
'Development of an in vitro regeneration system from immature inflorescences and CRISPR/Cas9-mediated gene editing in sudangrass.',
|
261 |
+
'BACKGROUND: Sudangrass (Sorghum sudanense) is a major biomass producer for livestock feed and biofuel in many countries. It has a wide range of adaptations for growing on marginal lands under biotic and abiotic stresses. The immature inflorescence is an explant with high embryogenic competence and is frequently used to regenerate different sorghum cultivars. Caffeic acid O-methyl transferase (COMT) is a key enzyme in the lignin biosynthesis pathway, which limits ruminant digestion of forage cell walls and is a crucial barrier in the conversion of plant biomass to bioethanol. Genome editing by CRISPR/Cas9-mediated mutagenesis without a transgenic footprint will accelerate the improvement and facilitate regulatory approval and commercialization of biotech crops. METHODS AND RESULTS: We report the overcome of the recalcitrance in sudangrass transformation and regeneration in order to use genome editing technique. Hence, an efficient regeneration system has been established to induce somatic embryogenesis from the immature inflorescence of two sudangrass cultivars on four MS-based media supplemented with different components. Our results indicate an interaction between genotype and medium composition. The combination of Giza-1 cultivar and M4 medium produces the maximum frequency of embryogenic calli of 80% and subsequent regeneration efficiency of 22.6%. Precise mutagenesis of the COMT gene is executed using the CRISPR/Cas9 system with the potential to reduce lignin content and enhance forage and biomass quality in sudangrass. CONCLUSION: A reliable regeneration and transformation system has been established for sudangrass using immature inflorescence, and the CRISPR/Cas9 system has demonstrated a promising technology for genome editing. The outcomes of this research will pave the road for further improvement of various sorghum genotypes to meet the global demand for food, feed, and biofuels, achieving sustainable development goals (SDGs).',
|
262 |
+
'HIV envelope protein (Env) is the sole target of broadly neutralizing antibodies (BNAbs) that are capable of neutralizing diverse strains of HIV. While BNAbs develop spontaneously in a subset of HIV-infected patients, efforts to design an envelope protein-based immunogen to elicit broadly neutralizing antibody responses have so far been unsuccessful. It is hypothesized that a primary barrier to eliciting BNAbs is the fact that HIV envelope proteins bind poorly to the germline-encoded unmutated common ancestor (UCA) precursors to BNAbs. To identify variant forms of Env with increased affinities for the UCA forms of BNAbs 4E10 and 10E8, which target the Membrane Proximal External Region (MPER) of Env, libraries of randomly mutated Env variants were expressed in a yeast surface display system and screened using fluorescence activated cell sorting for cells displaying variants with enhanced abilities to bind the UCA antibodies. Based on analyses of individual clones obtained from the screen and on next-generation sequencing of sorted libraries, distinct but partially overlapping sets of amino acid substitutions conferring enhanced UCA antibody binding were identified. These were particularly enriched in substitutions of arginine for highly conserved tryptophan residues. The UCA-binding variants also generally exhibited enhanced binding to the mature forms of anti-MPER antibodies. Mapping of the identified substitutions into available structures of Env suggest that they may act by destabilizing both the initial pre-fusion conformation and the six-helix bundle involved in fusion of the viral and cell membranes, as well as providing new or expanded epitopes with increased accessibility for the UCA antibodies.',
|
263 |
+
]
|
264 |
+
embeddings = model.encode(sentences)
|
265 |
+
print(embeddings.shape)
|
266 |
+
# [3, 512]
|
267 |
+
|
268 |
+
# Get the similarity scores for the embeddings
|
269 |
+
similarities = model.similarity(embeddings, embeddings)
|
270 |
+
print(similarities.shape)
|
271 |
+
# [3, 3]
|
272 |
+
```
|
273 |
+
|
274 |
+
<!--
|
275 |
+
### Direct Usage (Transformers)
|
276 |
+
|
277 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
278 |
+
|
279 |
+
</details>
|
280 |
+
-->
|
281 |
+
|
282 |
+
<!--
|
283 |
+
### Downstream Usage (Sentence Transformers)
|
284 |
+
|
285 |
+
You can finetune this model on your own dataset.
|
286 |
+
|
287 |
+
<details><summary>Click to expand</summary>
|
288 |
+
|
289 |
+
</details>
|
290 |
+
-->
|
291 |
+
|
292 |
+
<!--
|
293 |
+
### Out-of-Scope Use
|
294 |
+
|
295 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
296 |
+
-->
|
297 |
+
|
298 |
+
<!--
|
299 |
+
## Bias, Risks and Limitations
|
300 |
+
|
301 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
302 |
+
-->
|
303 |
+
|
304 |
+
<!--
|
305 |
+
### Recommendations
|
306 |
+
|
307 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
308 |
+
-->
|
309 |
+
|
310 |
+
## Training Details
|
311 |
+
|
312 |
+
### Training Dataset
|
313 |
+
|
314 |
+
#### parquet
|
315 |
+
|
316 |
+
* Dataset: parquet
|
317 |
+
* Size: 33,870,508 training samples
|
318 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
319 |
+
* Approximate statistics based on the first 1000 samples:
|
320 |
+
| | anchor | positive |
|
321 |
+
|:--------|:----------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
|
322 |
+
| type | string | string |
|
323 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 22.56 tokens</li><li>max: 64 tokens</li></ul> | <ul><li>min: 12 tokens</li><li>mean: 250.53 tokens</li><li>max: 512 tokens</li></ul> |
|
324 |
+
* Samples:
|
325 |
+
| anchor | positive |
|
326 |
+
|:------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
327 |
+
| <code>Characteristics of the HIV/AIDS Epidemic among People Aged ≥ 50 Years in China during 2018-2021.</code> | <code>Objective: This study aimed to determine the current epidemiological status of PLWHA aged ≥ 50 years in China from 2018 to 2021. It also aimed to recommend targeted interventions for the prevention and treatment of HIV/AIDS in elderly patients. Methods: Data on newly reported cases of PLWHA, aged ≥ 50 years in China from 2018 to 2021, were collected using the CRIMS. Trend tests and spatial analyses were also conducted. Results: Between 2018 and 2021, 237,724 HIV/AIDS cases were reported among patients aged ≥ 50 years in China. The main transmission route was heterosexual transmission (91.24%). Commercial heterosexual transmission (CHC) was the primary mode of transmission among males, while non-marital non-CHC ([NMNCHC]; 60.59%) was the prevalent route in women. The proportion of patients with CHC decreased over time ( Z = 67.716, P < 0.01), while that of patients with NMNCHC increased ( Z = 153.05, P < 0.01). The sex ratio varied among the different modes of infection, and it peaked a...</code> |
|
328 |
+
| <code>Obstructive sleep apnea syndrome: A frequent and difficult-to-detect complication of radiotherapy for oropharyngeal cancers.</code> | <code>This pilot study reveals a higher prevalence of obstructive sleep apnea syndrome (OSAS) in patients treated for oropharyngeal squamous cell carcinoma with radiotherapy compared to the general population. OSAS indicators such as the Epworth Sleepiness Scale seem insufficient in the diagnostic approach to OSAS in this population and systematic screenings should be considered.</code> |
|
329 |
+
| <code>Two new JK silencing alleles identified by single molecule sequencing with 20-Kb long-reads.</code> | <code>BACKGROUND: The Kidd blood group gene SLC14A1 and JK*02 having c.499A>G, c.588A>G, and c.743C>A (p.Ala248Asp). The two JK alleles identified have not been previously described. Transfection and expression studies indicated that the CHO cells transfected with JK*02 having c.743C>A did not express the Jkb and Jk3 antigens. CONCLUSIONS: We identified new JK silencing alleles and their critical SNVs by single-molecule sequencing and the findings were confirmed by transfection and expression studies.</code> |
|
330 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
331 |
+
```json
|
332 |
+
{
|
333 |
+
"scale": 20.0,
|
334 |
+
"similarity_fct": "cos_sim"
|
335 |
+
}
|
336 |
+
```
|
337 |
+
|
338 |
+
### Evaluation Dataset
|
339 |
+
|
340 |
+
#### parquet
|
341 |
+
|
342 |
+
* Dataset: parquet
|
343 |
+
* Size: 33,870,508 evaluation samples
|
344 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
345 |
+
* Approximate statistics based on the first 1000 samples:
|
346 |
+
| | anchor | positive |
|
347 |
+
|:--------|:----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
|
348 |
+
| type | string | string |
|
349 |
+
| details | <ul><li>min: 3 tokens</li><li>mean: 22.47 tokens</li><li>max: 95 tokens</li></ul> | <ul><li>min: 7 tokens</li><li>mean: 251.6 tokens</li><li>max: 512 tokens</li></ul> |
|
350 |
+
* Samples:
|
351 |
+
| anchor | positive |
|
352 |
+
|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
353 |
+
| <code>Causes and Management of Endoscopic Retrograde Cholangiopancreatography-Related Perforation: A Retrospective Study.</code> | <code>BACKGROUND: Endoscopic retrograde cholangiopancreatography of ERCP-related perforation and conducted a retrospective review. RESULTS: Of the 15 patients, 6 were female and 9 were male, and the mean age was 77.1 years. According to Stapfer's classification, the 15 cases of ERCP-related perforation comprised 3 type I (duodenum), 3 type II (periampullary), 9 type III (distal bile duct or pancreatic duct), and no type IV cases. Fourteen of 15 (92.6%) were diagnosed during ERCP. The main cause of perforation was scope-induced damage, endoscopic sphincterotomy, and instrumentation penetration in type I, II, and III cases, respectively. Four patients with severe abdominal pain and extraluminal fluid collection underwent emergency surgery for repair and drainage. One type III patient with distal bile duct cancer underwent pancreaticoduodenectomy on day 6. Three type III patients with only retroperitoneal gas on computed tomography (CT) performed immediately after ERCP had no symptoms and neede...</code> |
|
354 |
+
| <code>Covariance among premating, post-copulatory and viability fitness components in Drosophila melanogaster and their influence on paternity measurement.</code> | <code>In polyandrous mating systems, male fitness depends on success in premating, post-copulatory and offspring viability episodes of selection. We tracked male success across all of these episodes simultaneously, using transgenic Drosophila melanogaster with ubiquitously expressed green fluorescent protein (that is GFP) in a series of competitive and noncompetitive matings. This approach permitted us to track paternity-specific viability over all life stages and to distinguish true competitive fertilization success from differential early offspring viability. Relationships between episodes of selection were generally not present when paternity was measured in eggs; however, positive correlations between sperm competitive success and offspring viability became significant when paternity was measured in adult offspring. Additionally, we found a significant male × female interaction on hatching success and a lack of repeatability of offspring viability across a focal male's matings, which may...</code> |
|
355 |
+
| <code>Strategic partnerships to improve surgical care in the Asia–Pacific region: proceedings</code> | <code>Emergency and essential surgery is a critical component of universal health coverage. Session three of the three-part virtual meeting series on Strategic Planning to Improve Surgical, Obstetric, Anaesthesia, and Trauma Care in the Asia–Pacific Region focused on strategic partnerships. During this session, a range of partner organisations, including intergovernmental organisations, professional associations, academic and research institutions, non-governmental organisations, and the private sector provided an update on their work in surgical system strengthening in the Asia–Pacific region. Partner organisations could provide technical and implementation support for National Surgical, Obstetric, and Anaesthesia Planning (NSOAP) in a number of areas, including workforce strengthening, capacity building, guideline development, monitoring and evaluation, and service delivery. Participants emphasised the importance of several forms of strategic collaboration: 1) collaboration across the spec...</code> |
|
356 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
357 |
+
```json
|
358 |
+
{
|
359 |
+
"scale": 20.0,
|
360 |
+
"similarity_fct": "cos_sim"
|
361 |
+
}
|
362 |
+
```
|
363 |
+
|
364 |
+
### Training Hyperparameters
|
365 |
+
#### Non-Default Hyperparameters
|
366 |
+
|
367 |
+
- `eval_strategy`: steps
|
368 |
+
- `per_device_train_batch_size`: 128
|
369 |
+
- `learning_rate`: 2e-05
|
370 |
+
- `num_train_epochs`: 1
|
371 |
+
- `max_steps`: 251382
|
372 |
+
- `log_level`: info
|
373 |
+
- `fp16`: True
|
374 |
+
- `dataloader_num_workers`: 16
|
375 |
+
- `load_best_model_at_end`: True
|
376 |
+
- `resume_from_checkpoint`: True
|
377 |
+
|
378 |
+
#### All Hyperparameters
|
379 |
+
<details><summary>Click to expand</summary>
|
380 |
+
|
381 |
+
- `overwrite_output_dir`: False
|
382 |
+
- `do_predict`: False
|
383 |
+
- `eval_strategy`: steps
|
384 |
+
- `prediction_loss_only`: True
|
385 |
+
- `per_device_train_batch_size`: 128
|
386 |
+
- `per_device_eval_batch_size`: 8
|
387 |
+
- `per_gpu_train_batch_size`: None
|
388 |
+
- `per_gpu_eval_batch_size`: None
|
389 |
+
- `gradient_accumulation_steps`: 1
|
390 |
+
- `eval_accumulation_steps`: None
|
391 |
+
- `torch_empty_cache_steps`: None
|
392 |
+
- `learning_rate`: 2e-05
|
393 |
+
- `weight_decay`: 0.0
|
394 |
+
- `adam_beta1`: 0.9
|
395 |
+
- `adam_beta2`: 0.999
|
396 |
+
- `adam_epsilon`: 1e-08
|
397 |
+
- `max_grad_norm`: 1.0
|
398 |
+
- `num_train_epochs`: 1
|
399 |
+
- `max_steps`: 251382
|
400 |
+
- `lr_scheduler_type`: linear
|
401 |
+
- `lr_scheduler_kwargs`: {}
|
402 |
+
- `warmup_ratio`: 0.0
|
403 |
+
- `warmup_steps`: 0
|
404 |
+
- `log_level`: info
|
405 |
+
- `log_level_replica`: warning
|
406 |
+
- `log_on_each_node`: True
|
407 |
+
- `logging_nan_inf_filter`: True
|
408 |
+
- `save_safetensors`: True
|
409 |
+
- `save_on_each_node`: False
|
410 |
+
- `save_only_model`: False
|
411 |
+
- `restore_callback_states_from_checkpoint`: False
|
412 |
+
- `no_cuda`: False
|
413 |
+
- `use_cpu`: False
|
414 |
+
- `use_mps_device`: False
|
415 |
+
- `seed`: 42
|
416 |
+
- `data_seed`: None
|
417 |
+
- `jit_mode_eval`: False
|
418 |
+
- `use_ipex`: False
|
419 |
+
- `bf16`: False
|
420 |
+
- `fp16`: True
|
421 |
+
- `fp16_opt_level`: O1
|
422 |
+
- `half_precision_backend`: auto
|
423 |
+
- `bf16_full_eval`: False
|
424 |
+
- `fp16_full_eval`: False
|
425 |
+
- `tf32`: None
|
426 |
+
- `local_rank`: 0
|
427 |
+
- `ddp_backend`: None
|
428 |
+
- `tpu_num_cores`: None
|
429 |
+
- `tpu_metrics_debug`: False
|
430 |
+
- `debug`: []
|
431 |
+
- `dataloader_drop_last`: False
|
432 |
+
- `dataloader_num_workers`: 16
|
433 |
+
- `dataloader_prefetch_factor`: None
|
434 |
+
- `past_index`: -1
|
435 |
+
- `disable_tqdm`: False
|
436 |
+
- `remove_unused_columns`: True
|
437 |
+
- `label_names`: None
|
438 |
+
- `load_best_model_at_end`: True
|
439 |
+
- `ignore_data_skip`: False
|
440 |
+
- `fsdp`: []
|
441 |
+
- `fsdp_min_num_params`: 0
|
442 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
443 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
444 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
445 |
+
- `deepspeed`: None
|
446 |
+
- `label_smoothing_factor`: 0.0
|
447 |
+
- `optim`: adamw_torch
|
448 |
+
- `optim_args`: None
|
449 |
+
- `adafactor`: False
|
450 |
+
- `group_by_length`: False
|
451 |
+
- `length_column_name`: length
|
452 |
+
- `ddp_find_unused_parameters`: None
|
453 |
+
- `ddp_bucket_cap_mb`: None
|
454 |
+
- `ddp_broadcast_buffers`: False
|
455 |
+
- `dataloader_pin_memory`: True
|
456 |
+
- `dataloader_persistent_workers`: False
|
457 |
+
- `skip_memory_metrics`: True
|
458 |
+
- `use_legacy_prediction_loop`: False
|
459 |
+
- `push_to_hub`: False
|
460 |
+
- `resume_from_checkpoint`: True
|
461 |
+
- `hub_model_id`: None
|
462 |
+
- `hub_strategy`: every_save
|
463 |
+
- `hub_private_repo`: None
|
464 |
+
- `hub_always_push`: False
|
465 |
+
- `gradient_checkpointing`: False
|
466 |
+
- `gradient_checkpointing_kwargs`: None
|
467 |
+
- `include_inputs_for_metrics`: False
|
468 |
+
- `include_for_metrics`: []
|
469 |
+
- `eval_do_concat_batches`: True
|
470 |
+
- `fp16_backend`: auto
|
471 |
+
- `push_to_hub_model_id`: None
|
472 |
+
- `push_to_hub_organization`: None
|
473 |
+
- `mp_parameters`:
|
474 |
+
- `auto_find_batch_size`: False
|
475 |
+
- `full_determinism`: False
|
476 |
+
- `torchdynamo`: None
|
477 |
+
- `ray_scope`: last
|
478 |
+
- `ddp_timeout`: 1800
|
479 |
+
- `torch_compile`: False
|
480 |
+
- `torch_compile_backend`: None
|
481 |
+
- `torch_compile_mode`: None
|
482 |
+
- `dispatch_batches`: None
|
483 |
+
- `split_batches`: None
|
484 |
+
- `include_tokens_per_second`: False
|
485 |
+
- `include_num_input_tokens_seen`: False
|
486 |
+
- `neftune_noise_alpha`: None
|
487 |
+
- `optim_target_modules`: None
|
488 |
+
- `batch_eval_metrics`: False
|
489 |
+
- `eval_on_start`: False
|
490 |
+
- `use_liger_kernel`: False
|
491 |
+
- `eval_use_gather_object`: False
|
492 |
+
- `average_tokens_across_devices`: False
|
493 |
+
- `prompts`: None
|
494 |
+
- `batch_sampler`: batch_sampler
|
495 |
+
- `multi_dataset_batch_sampler`: proportional
|
496 |
+
|
497 |
+
</details>
|
498 |
+
|
499 |
+
### Training Logs
|
500 |
+
<details><summary>Click to expand</summary>
|
501 |
+
|
502 |
+
| Epoch | Step | Training Loss | Validation Loss |
|
503 |
+
|:------:|:------:|:-------------:|:---------------:|
|
504 |
+
| 0.0000 | 1 | 1.6269 | - |
|
505 |
+
| 0.0040 | 1000 | 0.2123 | - |
|
506 |
+
| 0.0080 | 2000 | 0.1191 | - |
|
507 |
+
| 0.0119 | 3000 | 0.0948 | - |
|
508 |
+
| 0.0159 | 4000 | 0.0824 | - |
|
509 |
+
| 0.0199 | 5000 | 0.0708 | - |
|
510 |
+
| 0.0239 | 6000 | 0.0665 | - |
|
511 |
+
| 0.0278 | 7000 | 0.0612 | - |
|
512 |
+
| 0.0318 | 8000 | 0.0578 | - |
|
513 |
+
| 0.0358 | 9000 | 0.0542 | - |
|
514 |
+
| 0.0398 | 10000 | 0.0528 | - |
|
515 |
+
| 0.0438 | 11000 | 0.0505 | - |
|
516 |
+
| 0.0477 | 12000 | 0.0461 | - |
|
517 |
+
| 0.0517 | 13000 | 0.0468 | - |
|
518 |
+
| 0.0557 | 14000 | 0.0442 | - |
|
519 |
+
| 0.0597 | 15000 | 0.0435 | - |
|
520 |
+
| 0.0636 | 16000 | 0.0414 | - |
|
521 |
+
| 0.0676 | 17000 | 0.0421 | - |
|
522 |
+
| 0.0716 | 18000 | 0.0399 | - |
|
523 |
+
| 0.0756 | 19000 | 0.0409 | - |
|
524 |
+
| 0.0796 | 20000 | 0.0393 | - |
|
525 |
+
| 0.0835 | 21000 | 0.0369 | - |
|
526 |
+
| 0.0875 | 22000 | 0.0349 | - |
|
527 |
+
| 0.0915 | 23000 | 0.0361 | - |
|
528 |
+
| 0.0955 | 24000 | 0.0358 | - |
|
529 |
+
| 0.0994 | 25000 | 0.0348 | - |
|
530 |
+
| 0.1034 | 26000 | 0.032 | - |
|
531 |
+
| 0.1074 | 27000 | 0.0341 | - |
|
532 |
+
| 0.1114 | 28000 | 0.0339 | - |
|
533 |
+
| 0.1154 | 29000 | 0.0325 | - |
|
534 |
+
| 0.1193 | 30000 | 0.0331 | - |
|
535 |
+
| 0.1233 | 31000 | 0.0306 | - |
|
536 |
+
| 0.1273 | 32000 | 0.0302 | - |
|
537 |
+
| 0.1313 | 33000 | 0.0304 | - |
|
538 |
+
| 0.1353 | 34000 | 0.0304 | - |
|
539 |
+
| 0.1392 | 35000 | 0.0306 | - |
|
540 |
+
| 0.1432 | 36000 | 0.0291 | - |
|
541 |
+
| 0.1472 | 37000 | 0.0273 | - |
|
542 |
+
| 0.1512 | 38000 | 0.0284 | - |
|
543 |
+
| 0.1551 | 39000 | 0.0292 | - |
|
544 |
+
| 0.1591 | 40000 | 0.0287 | - |
|
545 |
+
| 0.1631 | 41000 | 0.0277 | - |
|
546 |
+
| 0.1671 | 42000 | 0.0283 | - |
|
547 |
+
| 0.1711 | 43000 | 0.0268 | - |
|
548 |
+
| 0.1750 | 44000 | 0.027 | - |
|
549 |
+
| 0.1790 | 45000 | 0.0268 | - |
|
550 |
+
| 0.1830 | 46000 | 0.0259 | - |
|
551 |
+
| 0.1870 | 47000 | 0.0257 | - |
|
552 |
+
| 0.1909 | 48000 | 0.0252 | - |
|
553 |
+
| 0.1949 | 49000 | 0.0257 | - |
|
554 |
+
| 0.1989 | 50000 | 0.026 | - |
|
555 |
+
| 0.2029 | 51000 | 0.0262 | - |
|
556 |
+
| 0.2069 | 52000 | 0.0253 | - |
|
557 |
+
| 0.2108 | 53000 | 0.0252 | - |
|
558 |
+
| 0.2148 | 54000 | 0.025 | - |
|
559 |
+
| 0.2188 | 55000 | 0.0234 | - |
|
560 |
+
| 0.2228 | 56000 | 0.0233 | - |
|
561 |
+
| 0.2267 | 57000 | 0.0239 | - |
|
562 |
+
| 0.2307 | 58000 | 0.023 | - |
|
563 |
+
| 0.2347 | 59000 | 0.0246 | - |
|
564 |
+
| 0.2387 | 60000 | 0.0232 | - |
|
565 |
+
| 0.2427 | 61000 | 0.0244 | - |
|
566 |
+
| 0.2466 | 62000 | 0.0238 | - |
|
567 |
+
| 0.2506 | 63000 | 0.0231 | - |
|
568 |
+
| 0.2546 | 64000 | 0.0231 | - |
|
569 |
+
| 0.2586 | 65000 | 0.0226 | - |
|
570 |
+
| 0.2625 | 66000 | 0.0233 | - |
|
571 |
+
| 0.2665 | 67000 | 0.022 | - |
|
572 |
+
| 0.2705 | 68000 | 0.0222 | - |
|
573 |
+
| 0.2745 | 69000 | 0.0227 | - |
|
574 |
+
| 0.2785 | 70000 | 0.0232 | - |
|
575 |
+
| 0.2824 | 71000 | 0.0221 | - |
|
576 |
+
| 0.2864 | 72000 | 0.0223 | - |
|
577 |
+
| 0.2904 | 73000 | 0.0224 | - |
|
578 |
+
| 0.2944 | 74000 | 0.0218 | - |
|
579 |
+
| 0.2983 | 75000 | 0.0216 | - |
|
580 |
+
| 0.3023 | 76000 | 0.0213 | - |
|
581 |
+
| 0.3063 | 77000 | 0.0206 | - |
|
582 |
+
| 0.3103 | 78000 | 0.0214 | - |
|
583 |
+
| 0.3143 | 79000 | 0.0215 | - |
|
584 |
+
| 0.3182 | 80000 | 0.022 | - |
|
585 |
+
| 0.3222 | 81000 | 0.0209 | - |
|
586 |
+
| 0.3262 | 82000 | 0.0211 | - |
|
587 |
+
| 0.3302 | 83000 | 0.0215 | - |
|
588 |
+
| 0.3342 | 84000 | 0.0205 | - |
|
589 |
+
| 0.3381 | 85000 | 0.0201 | - |
|
590 |
+
| 0.3421 | 86000 | 0.0198 | - |
|
591 |
+
| 0.3461 | 87000 | 0.0208 | - |
|
592 |
+
| 0.3501 | 88000 | 0.0206 | - |
|
593 |
+
| 0.3540 | 89000 | 0.0193 | - |
|
594 |
+
| 0.3580 | 90000 | 0.0217 | - |
|
595 |
+
| 0.3620 | 91000 | 0.0197 | - |
|
596 |
+
| 0.3660 | 92000 | 0.0206 | - |
|
597 |
+
| 0.3700 | 93000 | 0.0193 | - |
|
598 |
+
| 0.3739 | 94000 | 0.019 | - |
|
599 |
+
| 0.3779 | 95000 | 0.0197 | - |
|
600 |
+
| 0.3819 | 96000 | 0.02 | - |
|
601 |
+
| 0.3859 | 97000 | 0.0176 | - |
|
602 |
+
| 0.3898 | 98000 | 0.0198 | - |
|
603 |
+
| 0.3938 | 99000 | 0.0186 | - |
|
604 |
+
| 0.3978 | 100000 | 0.0191 | - |
|
605 |
+
| 0.4018 | 101000 | 0.0187 | - |
|
606 |
+
| 0.4058 | 102000 | 0.0192 | - |
|
607 |
+
| 0.4097 | 103000 | 0.0183 | - |
|
608 |
+
| 0.4137 | 104000 | 0.0192 | - |
|
609 |
+
| 0.4177 | 105000 | 0.019 | - |
|
610 |
+
| 0.4217 | 106000 | 0.0179 | - |
|
611 |
+
| 0.4256 | 107000 | 0.0195 | - |
|
612 |
+
| 0.4296 | 108000 | 0.0183 | - |
|
613 |
+
| 0.4336 | 109000 | 0.018 | - |
|
614 |
+
| 0.4376 | 110000 | 0.0187 | - |
|
615 |
+
| 0.4416 | 111000 | 0.0178 | - |
|
616 |
+
| 0.4455 | 112000 | 0.0178 | - |
|
617 |
+
| 0.4495 | 113000 | 0.0181 | - |
|
618 |
+
| 0.4535 | 114000 | 0.0176 | - |
|
619 |
+
| 0.4575 | 115000 | 0.0189 | - |
|
620 |
+
| 0.4614 | 116000 | 0.0181 | - |
|
621 |
+
| 0.4654 | 117000 | 0.0185 | - |
|
622 |
+
| 0.4694 | 118000 | 0.0178 | - |
|
623 |
+
| 0.4734 | 119000 | 0.0183 | - |
|
624 |
+
| 0.4774 | 120000 | 0.0171 | - |
|
625 |
+
| 0.4813 | 121000 | 0.0164 | - |
|
626 |
+
| 0.4853 | 122000 | 0.0177 | - |
|
627 |
+
| 0.4893 | 123000 | 0.0184 | - |
|
628 |
+
| 0.4933 | 124000 | 0.0169 | - |
|
629 |
+
| 0.4972 | 125000 | 0.017 | - |
|
630 |
+
| 0.5012 | 126000 | 0.0174 | - |
|
631 |
+
| 0.5052 | 127000 | 0.0175 | - |
|
632 |
+
| 0.5092 | 128000 | 0.0167 | - |
|
633 |
+
| 0.5132 | 129000 | 0.0178 | - |
|
634 |
+
| 0.5171 | 130000 | 0.018 | - |
|
635 |
+
| 0.5211 | 131000 | 0.0175 | - |
|
636 |
+
| 0.5251 | 132000 | 0.0174 | - |
|
637 |
+
| 0.5291 | 133000 | 0.0176 | - |
|
638 |
+
| 0.5331 | 134000 | 0.0179 | - |
|
639 |
+
| 0.5370 | 135000 | 0.0171 | - |
|
640 |
+
| 0.5410 | 136000 | 0.0175 | - |
|
641 |
+
| 0.5450 | 137000 | 0.0175 | - |
|
642 |
+
| 0.5490 | 138000 | 0.0166 | - |
|
643 |
+
| 0.5529 | 139000 | 0.0168 | - |
|
644 |
+
| 0.5569 | 140000 | 0.0164 | - |
|
645 |
+
| 0.5609 | 141000 | 0.0163 | - |
|
646 |
+
| 0.5649 | 142000 | 0.0161 | - |
|
647 |
+
| 0.5689 | 143000 | 0.0169 | - |
|
648 |
+
| 0.5728 | 144000 | 0.0162 | - |
|
649 |
+
| 0.5768 | 145000 | 0.0171 | - |
|
650 |
+
| 0.5808 | 146000 | 0.0163 | - |
|
651 |
+
| 0.5848 | 147000 | 0.0163 | - |
|
652 |
+
| 0.5887 | 148000 | 0.0163 | - |
|
653 |
+
| 0.5927 | 149000 | 0.0164 | - |
|
654 |
+
| 0.5967 | 150000 | 0.0159 | - |
|
655 |
+
| 0.6007 | 151000 | 0.0164 | - |
|
656 |
+
| 0.6047 | 152000 | 0.0167 | - |
|
657 |
+
| 0.6086 | 153000 | 0.0167 | - |
|
658 |
+
| 0.6126 | 154000 | 0.0166 | - |
|
659 |
+
| 0.6166 | 155000 | 0.0157 | - |
|
660 |
+
| 0.6206 | 156000 | 0.0162 | - |
|
661 |
+
| 0.6245 | 157000 | 0.0164 | - |
|
662 |
+
| 0.6285 | 158000 | 0.0164 | - |
|
663 |
+
| 0.6325 | 159000 | 0.016 | - |
|
664 |
+
| 0.6365 | 160000 | 0.0162 | - |
|
665 |
+
| 0.6405 | 161000 | 0.0154 | - |
|
666 |
+
| 0.6444 | 162000 | 0.015 | - |
|
667 |
+
| 0.6484 | 163000 | 0.0158 | - |
|
668 |
+
| 0.6524 | 164000 | 0.0157 | - |
|
669 |
+
| 0.6564 | 165000 | 0.0165 | - |
|
670 |
+
| 0.6603 | 166000 | 0.0149 | - |
|
671 |
+
| 0.6643 | 167000 | 0.0159 | - |
|
672 |
+
| 0.6683 | 168000 | 0.0154 | - |
|
673 |
+
| 0.6723 | 169000 | 0.0156 | - |
|
674 |
+
| 0.6763 | 170000 | 0.0153 | - |
|
675 |
+
| 0.6802 | 171000 | 0.0155 | - |
|
676 |
+
| 0.6842 | 172000 | 0.0158 | - |
|
677 |
+
| 0.6882 | 173000 | 0.0144 | - |
|
678 |
+
| 0.6922 | 174000 | 0.0154 | - |
|
679 |
+
| 0.6961 | 175000 | 0.0153 | - |
|
680 |
+
| 0.7001 | 176000 | 0.0149 | - |
|
681 |
+
| 0.7041 | 177000 | 0.0152 | - |
|
682 |
+
| 0.7081 | 178000 | 0.0157 | - |
|
683 |
+
| 0.7121 | 179000 | 0.0148 | - |
|
684 |
+
| 0.7160 | 180000 | 0.0146 | - |
|
685 |
+
| 0.7200 | 181000 | 0.0152 | - |
|
686 |
+
| 0.7240 | 182000 | 0.0151 | - |
|
687 |
+
| 0.7280 | 183000 | 0.0159 | - |
|
688 |
+
| 0.7320 | 184000 | 0.0147 | - |
|
689 |
+
| 0.7359 | 185000 | 0.0139 | - |
|
690 |
+
| 0.7399 | 186000 | 0.0149 | - |
|
691 |
+
| 0.7439 | 187000 | 0.0143 | - |
|
692 |
+
| 0.7479 | 188000 | 0.0145 | - |
|
693 |
+
| 0.7518 | 189000 | 0.0154 | - |
|
694 |
+
| 0.7558 | 190000 | 0.0151 | - |
|
695 |
+
| 0.7598 | 191000 | 0.0155 | - |
|
696 |
+
| 0.7638 | 192000 | 0.016 | - |
|
697 |
+
| 0.7678 | 193000 | 0.0139 | - |
|
698 |
+
| 0.7717 | 194000 | 0.0154 | - |
|
699 |
+
| 0.7757 | 195000 | 0.0138 | - |
|
700 |
+
| 0.7797 | 196000 | 0.0147 | - |
|
701 |
+
| 0.7837 | 197000 | 0.0152 | - |
|
702 |
+
| 0.7876 | 198000 | 0.0141 | - |
|
703 |
+
| 0.7916 | 199000 | 0.0142 | - |
|
704 |
+
| 0.7956 | 200000 | 0.0149 | - |
|
705 |
+
| 0.7996 | 201000 | 0.0142 | - |
|
706 |
+
| 0.8036 | 202000 | 0.015 | - |
|
707 |
+
| 0.8075 | 203000 | 0.0142 | - |
|
708 |
+
| 0.8115 | 204000 | 0.0152 | - |
|
709 |
+
| 0.8155 | 205000 | 0.0142 | - |
|
710 |
+
| 0.8195 | 206000 | 0.0141 | - |
|
711 |
+
| 0.8234 | 207000 | 0.0146 | - |
|
712 |
+
| 0.8274 | 208000 | 0.014 | - |
|
713 |
+
| 0.8314 | 209000 | 0.0146 | - |
|
714 |
+
| 0.8354 | 210000 | 0.0138 | - |
|
715 |
+
| 0.8394 | 211000 | 0.0141 | - |
|
716 |
+
| 0.8433 | 212000 | 0.0143 | - |
|
717 |
+
| 0.8473 | 213000 | 0.0139 | - |
|
718 |
+
| 0.8513 | 214000 | 0.0138 | - |
|
719 |
+
| 0.8553 | 215000 | 0.0146 | - |
|
720 |
+
| 0.8592 | 216000 | 0.014 | - |
|
721 |
+
| 0.8632 | 217000 | 0.0138 | - |
|
722 |
+
| 0.8672 | 218000 | 0.0143 | - |
|
723 |
+
| 0.8712 | 219000 | 0.0151 | - |
|
724 |
+
| 0.8752 | 220000 | 0.0146 | - |
|
725 |
+
| 0.8791 | 221000 | 0.0135 | - |
|
726 |
+
| 0.8831 | 222000 | 0.0136 | - |
|
727 |
+
| 0.8871 | 223000 | 0.0139 | - |
|
728 |
+
| 0.8911 | 224000 | 0.0136 | - |
|
729 |
+
| 0.8950 | 225000 | 0.0142 | - |
|
730 |
+
| 0.8990 | 226000 | 0.0134 | - |
|
731 |
+
| 0.9030 | 227000 | 0.0143 | - |
|
732 |
+
| 0.9070 | 228000 | 0.0142 | - |
|
733 |
+
| 0.9110 | 229000 | 0.0142 | - |
|
734 |
+
| 0.9149 | 230000 | 0.0138 | - |
|
735 |
+
| 0.9189 | 231000 | 0.0136 | - |
|
736 |
+
| 0.9229 | 232000 | 0.0138 | - |
|
737 |
+
| 0.9269 | 233000 | 0.0144 | - |
|
738 |
+
| 0.9309 | 234000 | 0.0137 | - |
|
739 |
+
| 0.9348 | 235000 | 0.0135 | - |
|
740 |
+
| 0.9388 | 236000 | 0.014 | - |
|
741 |
+
| 0.9428 | 237000 | 0.014 | - |
|
742 |
+
| 0.9468 | 238000 | 0.0136 | - |
|
743 |
+
| 0.9507 | 239000 | 0.0134 | - |
|
744 |
+
| 0.9547 | 240000 | 0.0144 | - |
|
745 |
+
| 0.9587 | 241000 | 0.0136 | - |
|
746 |
+
| 0.9627 | 242000 | 0.014 | - |
|
747 |
+
| 0.9667 | 243000 | 0.0138 | - |
|
748 |
+
| 0.9706 | 244000 | 0.0133 | - |
|
749 |
+
| 0.9746 | 245000 | 0.0142 | - |
|
750 |
+
| 0.9786 | 246000 | 0.0135 | - |
|
751 |
+
| 0.9826 | 247000 | 0.013 | - |
|
752 |
+
| 0.9865 | 248000 | 0.0138 | - |
|
753 |
+
| 0.9905 | 249000 | 0.0146 | - |
|
754 |
+
| 0.9945 | 250000 | 0.0142 | - |
|
755 |
+
| 0.9985 | 251000 | 0.0134 | - |
|
756 |
+
| 1.0000 | 251382 | - | 0.0013 |
|
757 |
+
|
758 |
+
</details>
|
759 |
+
|
760 |
+
### Framework Versions
|
761 |
+
- Python: 3.11.11
|
762 |
+
- Sentence Transformers: 3.4.1
|
763 |
+
- Transformers: 4.48.2
|
764 |
+
- PyTorch: 2.6.0+cu124
|
765 |
+
- Accelerate: 1.3.0
|
766 |
+
- Datasets: 3.2.0
|
767 |
+
- Tokenizers: 0.21.0
|
768 |
+
|
769 |
+
## Citation
|
770 |
+
|
771 |
+
### BibTeX
|
772 |
+
|
773 |
+
#### Sentence Transformers
|
774 |
+
```bibtex
|
775 |
+
@inproceedings{reimers-2019-sentence-bert,
|
776 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
777 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
778 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
779 |
+
month = "11",
|
780 |
+
year = "2019",
|
781 |
+
publisher = "Association for Computational Linguistics",
|
782 |
+
url = "https://arxiv.org/abs/1908.10084",
|
783 |
+
}
|
784 |
+
```
|
785 |
+
|
786 |
+
#### MultipleNegativesRankingLoss
|
787 |
+
```bibtex
|
788 |
+
@misc{henderson2017efficient,
|
789 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
790 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
791 |
+
year={2017},
|
792 |
+
eprint={1705.00652},
|
793 |
+
archivePrefix={arXiv},
|
794 |
+
primaryClass={cs.CL}
|
795 |
+
}
|
796 |
+
```
|
797 |
+
|
798 |
+
<!--
|
799 |
+
## Glossary
|
800 |
+
|
801 |
+
*Clearly define terms in order to be accessible across audiences.*
|
802 |
+
-->
|
803 |
+
|
804 |
+
<!--
|
805 |
+
## Model Card Authors
|
806 |
+
|
807 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
808 |
+
-->
|
809 |
+
|
810 |
+
<!--
|
811 |
+
## Model Card Contact
|
812 |
+
|
813 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
814 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/data/aronow/pankaj/Embeddings/scripts/data/Bioformer-8L-UMLS-Pubmed_PMC-Random_TCE/Bioformer-8L-UMLS-Pubmed_PMC-Random_TCE-Epoch-1",
|
3 |
+
"architectures": [
|
4 |
+
"BertModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"classifier_dropout": null,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 512,
|
11 |
+
"initializer_range": 0.02,
|
12 |
+
"intermediate_size": 2048,
|
13 |
+
"layer_norm_eps": 1e-12,
|
14 |
+
"max_position_embeddings": 512,
|
15 |
+
"model_type": "bert",
|
16 |
+
"num_attention_heads": 8,
|
17 |
+
"num_hidden_layers": 8,
|
18 |
+
"pad_token_id": 0,
|
19 |
+
"position_embedding_type": "absolute",
|
20 |
+
"torch_dtype": "float32",
|
21 |
+
"transformers_version": "4.45.2",
|
22 |
+
"type_vocab_size": 2,
|
23 |
+
"use_cache": true,
|
24 |
+
"vocab_size": 32768
|
25 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.1.1",
|
4 |
+
"transformers": "4.45.2",
|
5 |
+
"pytorch": "2.4.1+cu121"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa8f88fca0be4b734111a158ee715ad631953f70e1a7e84ea3bd5467ca742e7b
|
3 |
+
size 170107592
|
modules.json
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
}
|
14 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 512,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": false,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"max_length": 512,
|
51 |
+
"model_max_length": 512,
|
52 |
+
"never_split": null,
|
53 |
+
"pad_to_multiple_of": null,
|
54 |
+
"pad_token": "[PAD]",
|
55 |
+
"pad_token_type_id": 0,
|
56 |
+
"padding_side": "right",
|
57 |
+
"sep_token": "[SEP]",
|
58 |
+
"stride": 0,
|
59 |
+
"strip_accents": null,
|
60 |
+
"tokenize_chinese_chars": true,
|
61 |
+
"tokenizer_class": "BertTokenizer",
|
62 |
+
"truncation_side": "right",
|
63 |
+
"truncation_strategy": "longest_first",
|
64 |
+
"unk_token": "[UNK]"
|
65 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|