ireneisdoomed commited on
Commit
12a25ea
·
verified ·
1 Parent(s): ee51c2a

chore: update model based on 24.11_freeze10 run with intervals

Browse files
Files changed (4) hide show
  1. .gitattributes +1 -0
  2. README.md +217 -0
  3. classifier.skops +3 -0
  4. config.json +226 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ classifier.skops filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: sklearn
3
+ tags:
4
+ - sklearn
5
+ - skops
6
+ - tabular-classification
7
+ model_format: skops
8
+ model_file: classifier.skops
9
+ widget:
10
+ - structuredData:
11
+ credibleSetConfidence:
12
+ - 0.75
13
+ - 0.75
14
+ - 0.75
15
+ dhsPmtrCorrelationMean:
16
+ - 0.0
17
+ - 0.0
18
+ - 0.0
19
+ dhsPmtrCorrelationMeanNeighbourhood:
20
+ - 0.0
21
+ - 0.0
22
+ - 0.0
23
+ distanceFootprintMean:
24
+ - 0.8487144112586975
25
+ - 0.9365111589431763
26
+ - 0.9975032806396484
27
+ distanceFootprintMeanNeighbourhood:
28
+ - 0.8487144112586975
29
+ - 0.9365111589431763
30
+ - 0.9975032806396484
31
+ distanceSentinelFootprint:
32
+ - 0.8487144112586975
33
+ - 0.9365111589431763
34
+ - 0.9975032806396484
35
+ distanceSentinelFootprintNeighbourhood:
36
+ - 0.8487144112586975
37
+ - 0.9365111589431763
38
+ - 0.9975032806396484
39
+ distanceSentinelTss:
40
+ - 0.8487144112586975
41
+ - 0.9257694482803345
42
+ - 0.9975032806396484
43
+ distanceSentinelTssNeighbourhood:
44
+ - 0.850220799446106
45
+ - 0.9274126291275024
46
+ - 0.9992737770080566
47
+ distanceTssMean:
48
+ - 0.8487144112586975
49
+ - 0.9257694482803345
50
+ - 0.9975032806396484
51
+ distanceTssMeanNeighbourhood:
52
+ - 0.850220799446106
53
+ - 0.9274126291275024
54
+ - 0.9992737770080566
55
+ eQtlColocClppMaximum:
56
+ - 0.0
57
+ - 0.0
58
+ - 0.0
59
+ eQtlColocClppMaximumNeighbourhood:
60
+ - 0.0
61
+ - 0.0
62
+ - 0.0
63
+ eQtlColocH4Maximum:
64
+ - 0.0
65
+ - 0.0
66
+ - 0.0
67
+ eQtlColocH4MaximumNeighbourhood:
68
+ - 0.0
69
+ - 0.0
70
+ - 0.0
71
+ enhTssCorrelationMean:
72
+ - 0.0
73
+ - 0.0
74
+ - 0.0
75
+ enhTssCorrelationMeanNeighbourhood:
76
+ - 0.0
77
+ - 0.0
78
+ - 0.0
79
+ geneCount500kb:
80
+ - 20.0
81
+ - 20.0
82
+ - 20.0
83
+ pQtlColocClppMaximum:
84
+ - 0.0
85
+ - 0.0
86
+ - 0.0
87
+ pQtlColocClppMaximumNeighbourhood:
88
+ - 0.0
89
+ - 0.0
90
+ - 0.0
91
+ pQtlColocH4Maximum:
92
+ - 0.0
93
+ - 0.0
94
+ - 0.0
95
+ pQtlColocH4MaximumNeighbourhood:
96
+ - 0.0
97
+ - 0.0
98
+ - 0.0
99
+ pchicMean:
100
+ - 0.0
101
+ - 0.0
102
+ - 0.0
103
+ pchicMeanNeighbourhood:
104
+ - 0.0
105
+ - 0.0
106
+ - 0.0
107
+ proteinGeneCount500kb:
108
+ - 8.0
109
+ - 8.0
110
+ - 8.0
111
+ sQtlColocClppMaximum:
112
+ - 0.0
113
+ - 0.0
114
+ - 0.0
115
+ sQtlColocClppMaximumNeighbourhood:
116
+ - 0.0
117
+ - 0.0
118
+ - 0.0
119
+ sQtlColocH4Maximum:
120
+ - 0.0
121
+ - 0.0
122
+ - 0.0
123
+ sQtlColocH4MaximumNeighbourhood:
124
+ - 0.0
125
+ - 0.0
126
+ - 0.0
127
+ studyLocusId:
128
+ - 005bc8624f8dd7f7c7bc63e651e9e59d
129
+ - 005bc8624f8dd7f7c7bc63e651e9e59d
130
+ - 005bc8624f8dd7f7c7bc63e651e9e59d
131
+ traitFromSourceMappedId:
132
+ - EFO_0004612
133
+ - EFO_0004612
134
+ - EFO_0004612
135
+ vepMaximum:
136
+ - 0.0
137
+ - 0.0
138
+ - 0.0
139
+ vepMaximumNeighbourhood:
140
+ - 0.0
141
+ - 0.0
142
+ - 0.0
143
+ vepMean:
144
+ - 0.0
145
+ - 0.0
146
+ - 0.0
147
+ vepMeanNeighbourhood:
148
+ - 0.0
149
+ - 0.0
150
+ - 0.0
151
+ ---
152
+
153
+ # Model description
154
+
155
+ The locus-to-gene (L2G) model derives features to prioritise likely causal genes at each GWAS locus based on genetic and functional genomics features. The main categories of predictive features are:
156
+
157
+ - Distance: (from credible set variants to gene)
158
+ - Molecular QTL Colocalization
159
+ - Chromatin Interaction: (e.g., promoter-capture Hi-C)
160
+ - Variant Pathogenicity: (from VEP)
161
+
162
+ More information at: https://opentargets.github.io/gentropy/python_api/methods/l2g/_l2g/
163
+
164
+
165
+ ## Intended uses & limitations
166
+
167
+ [More Information Needed]
168
+
169
+ ## Training Procedure
170
+
171
+ Gradient Boosting Classifier
172
+
173
+ ### Hyperparameters
174
+
175
+ <details>
176
+ <summary> Click to expand </summary>
177
+
178
+ | Hyperparameter | Value |
179
+ |--------------------------|--------------|
180
+ | ccp_alpha | 0 |
181
+ | criterion | friedman_mse |
182
+ | init | |
183
+ | learning_rate | 0.1 |
184
+ | loss | log_loss |
185
+ | max_depth | 5 |
186
+ | max_features | |
187
+ | max_leaf_nodes | |
188
+ | min_impurity_decrease | 0.0 |
189
+ | min_samples_leaf | 5 |
190
+ | min_samples_split | 5 |
191
+ | min_weight_fraction_leaf | 0.0 |
192
+ | n_estimators | 100 |
193
+ | n_iter_no_change | |
194
+ | random_state | 42 |
195
+ | subsample | 1 |
196
+ | tol | 0.0001 |
197
+ | validation_fraction | 0.1 |
198
+ | verbose | 0 |
199
+ | warm_start | False |
200
+
201
+ </details>
202
+
203
+ # How to Get Started with the Model
204
+
205
+ To use the model, you can load it using the `LocusToGeneModel.load_from_hub` method. This will return a `LocusToGeneModel` object that can be used to make predictions on a feature matrix.
206
+ The model can then be used to make predictions using the `predict` method.
207
+
208
+ More information can be found at: https://opentargets.github.io/gentropy/python_api/methods/l2g/model/
209
+
210
+
211
+ # Citation
212
+
213
+ https://doi.org/10.1038/s41588-021-00945-5
214
+
215
+ # License
216
+
217
+ MIT
classifier.skops ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aea6a19c5cfdf6299286c8324b31367430639a2a1e7b7fe12427d5f416c57473
3
+ size 2888608
config.json ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "sklearn": {
3
+ "columns": [
4
+ "studyLocusId",
5
+ "traitFromSourceMappedId",
6
+ "eQtlColocClppMaximum",
7
+ "pQtlColocClppMaximum",
8
+ "sQtlColocClppMaximum",
9
+ "eQtlColocH4Maximum",
10
+ "pQtlColocH4Maximum",
11
+ "sQtlColocH4Maximum",
12
+ "eQtlColocClppMaximumNeighbourhood",
13
+ "pQtlColocClppMaximumNeighbourhood",
14
+ "sQtlColocClppMaximumNeighbourhood",
15
+ "eQtlColocH4MaximumNeighbourhood",
16
+ "pQtlColocH4MaximumNeighbourhood",
17
+ "sQtlColocH4MaximumNeighbourhood",
18
+ "distanceSentinelFootprint",
19
+ "distanceSentinelFootprintNeighbourhood",
20
+ "distanceFootprintMean",
21
+ "distanceFootprintMeanNeighbourhood",
22
+ "distanceTssMean",
23
+ "distanceTssMeanNeighbourhood",
24
+ "distanceSentinelTss",
25
+ "distanceSentinelTssNeighbourhood",
26
+ "vepMaximum",
27
+ "vepMaximumNeighbourhood",
28
+ "vepMean",
29
+ "vepMeanNeighbourhood",
30
+ "pchicMean",
31
+ "pchicMeanNeighbourhood",
32
+ "enhTssCorrelationMean",
33
+ "enhTssCorrelationMeanNeighbourhood",
34
+ "dhsPmtrCorrelationMean",
35
+ "dhsPmtrCorrelationMeanNeighbourhood",
36
+ "geneCount500kb",
37
+ "proteinGeneCount500kb",
38
+ "credibleSetConfidence"
39
+ ],
40
+ "environment": [
41
+ "scikit-learn=1.6.1"
42
+ ],
43
+ "example_input": {
44
+ "credibleSetConfidence": [
45
+ 0.75,
46
+ 0.75,
47
+ 0.75
48
+ ],
49
+ "dhsPmtrCorrelationMean": [
50
+ 0.0,
51
+ 0.0,
52
+ 0.0
53
+ ],
54
+ "dhsPmtrCorrelationMeanNeighbourhood": [
55
+ 0.0,
56
+ 0.0,
57
+ 0.0
58
+ ],
59
+ "distanceFootprintMean": [
60
+ 0.8487144112586975,
61
+ 0.9365111589431763,
62
+ 0.9975032806396484
63
+ ],
64
+ "distanceFootprintMeanNeighbourhood": [
65
+ 0.8487144112586975,
66
+ 0.9365111589431763,
67
+ 0.9975032806396484
68
+ ],
69
+ "distanceSentinelFootprint": [
70
+ 0.8487144112586975,
71
+ 0.9365111589431763,
72
+ 0.9975032806396484
73
+ ],
74
+ "distanceSentinelFootprintNeighbourhood": [
75
+ 0.8487144112586975,
76
+ 0.9365111589431763,
77
+ 0.9975032806396484
78
+ ],
79
+ "distanceSentinelTss": [
80
+ 0.8487144112586975,
81
+ 0.9257694482803345,
82
+ 0.9975032806396484
83
+ ],
84
+ "distanceSentinelTssNeighbourhood": [
85
+ 0.850220799446106,
86
+ 0.9274126291275024,
87
+ 0.9992737770080566
88
+ ],
89
+ "distanceTssMean": [
90
+ 0.8487144112586975,
91
+ 0.9257694482803345,
92
+ 0.9975032806396484
93
+ ],
94
+ "distanceTssMeanNeighbourhood": [
95
+ 0.850220799446106,
96
+ 0.9274126291275024,
97
+ 0.9992737770080566
98
+ ],
99
+ "eQtlColocClppMaximum": [
100
+ 0.0,
101
+ 0.0,
102
+ 0.0
103
+ ],
104
+ "eQtlColocClppMaximumNeighbourhood": [
105
+ 0.0,
106
+ 0.0,
107
+ 0.0
108
+ ],
109
+ "eQtlColocH4Maximum": [
110
+ 0.0,
111
+ 0.0,
112
+ 0.0
113
+ ],
114
+ "eQtlColocH4MaximumNeighbourhood": [
115
+ 0.0,
116
+ 0.0,
117
+ 0.0
118
+ ],
119
+ "enhTssCorrelationMean": [
120
+ 0.0,
121
+ 0.0,
122
+ 0.0
123
+ ],
124
+ "enhTssCorrelationMeanNeighbourhood": [
125
+ 0.0,
126
+ 0.0,
127
+ 0.0
128
+ ],
129
+ "geneCount500kb": [
130
+ 20.0,
131
+ 20.0,
132
+ 20.0
133
+ ],
134
+ "pQtlColocClppMaximum": [
135
+ 0.0,
136
+ 0.0,
137
+ 0.0
138
+ ],
139
+ "pQtlColocClppMaximumNeighbourhood": [
140
+ 0.0,
141
+ 0.0,
142
+ 0.0
143
+ ],
144
+ "pQtlColocH4Maximum": [
145
+ 0.0,
146
+ 0.0,
147
+ 0.0
148
+ ],
149
+ "pQtlColocH4MaximumNeighbourhood": [
150
+ 0.0,
151
+ 0.0,
152
+ 0.0
153
+ ],
154
+ "pchicMean": [
155
+ 0.0,
156
+ 0.0,
157
+ 0.0
158
+ ],
159
+ "pchicMeanNeighbourhood": [
160
+ 0.0,
161
+ 0.0,
162
+ 0.0
163
+ ],
164
+ "proteinGeneCount500kb": [
165
+ 8.0,
166
+ 8.0,
167
+ 8.0
168
+ ],
169
+ "sQtlColocClppMaximum": [
170
+ 0.0,
171
+ 0.0,
172
+ 0.0
173
+ ],
174
+ "sQtlColocClppMaximumNeighbourhood": [
175
+ 0.0,
176
+ 0.0,
177
+ 0.0
178
+ ],
179
+ "sQtlColocH4Maximum": [
180
+ 0.0,
181
+ 0.0,
182
+ 0.0
183
+ ],
184
+ "sQtlColocH4MaximumNeighbourhood": [
185
+ 0.0,
186
+ 0.0,
187
+ 0.0
188
+ ],
189
+ "studyLocusId": [
190
+ "005bc8624f8dd7f7c7bc63e651e9e59d",
191
+ "005bc8624f8dd7f7c7bc63e651e9e59d",
192
+ "005bc8624f8dd7f7c7bc63e651e9e59d"
193
+ ],
194
+ "traitFromSourceMappedId": [
195
+ "EFO_0004612",
196
+ "EFO_0004612",
197
+ "EFO_0004612"
198
+ ],
199
+ "vepMaximum": [
200
+ 0.0,
201
+ 0.0,
202
+ 0.0
203
+ ],
204
+ "vepMaximumNeighbourhood": [
205
+ 0.0,
206
+ 0.0,
207
+ 0.0
208
+ ],
209
+ "vepMean": [
210
+ 0.0,
211
+ 0.0,
212
+ 0.0
213
+ ],
214
+ "vepMeanNeighbourhood": [
215
+ 0.0,
216
+ 0.0,
217
+ 0.0
218
+ ]
219
+ },
220
+ "model": {
221
+ "file": "classifier.skops"
222
+ },
223
+ "model_format": "skops",
224
+ "task": "tabular-classification"
225
+ }
226
+ }