GitHub Action
commited on
Commit
·
d28bf1d
1
Parent(s):
3bf1ffc
commit from github
Browse files- README.md +82 -12
- model.onnx +2 -2
- model.pkl +2 -2
README.md
CHANGED
@@ -58,9 +58,18 @@ inference: false
|
|
58 |
pipeline_tag: tabular-classification
|
59 |
---
|
60 |
|
61 |
-
# Model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
## Evaluation
|
64 |
|
65 |
| Metric | Value |
|
66 |
|-----------|----------|
|
@@ -69,22 +78,21 @@ pipeline_tag: tabular-classification
|
|
69 |
| precision | 0.951996 |
|
70 |
| recall | 0.938331 |
|
71 |
|
72 |
-
# Model
|
73 |
-
|
74 |
|
75 |
-
The model predicts the probability that a URL is a phishing site using a list of features.
|
76 |
|
77 |
-
|
78 |
-
|
79 |
-
- **Task:** Tabular classification (Binary)
|
80 |
-
- **License:** {{ license }}
|
81 |
-
- **Repository:** {{ repo }}
|
82 |
|
|
|
|
|
|
|
83 |
|
84 |
-
# How to Get Started with the Model
|
85 |
|
86 |
## With ONNX (recommanded)
|
87 |
|
|
|
|
|
88 |
```python
|
89 |
import onnxruntime
|
90 |
import pandas as pd
|
@@ -129,9 +137,71 @@ for url, proba in zip(data, probas):
|
|
129 |
print(f"Likelihood of being a phishing site: {proba[1] * 100:.2f}%")
|
130 |
print("----")
|
131 |
|
132 |
-
#
|
133 |
# URL: https://www.rga.com/about/workplace
|
134 |
# Likelihood of being a phishing site: 0.89%
|
135 |
# ----
|
136 |
|
137 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
pipeline_tag: tabular-classification
|
59 |
---
|
60 |
|
61 |
+
# Model Description
|
62 |
+
|
63 |
+
|
64 |
+
The model predicts the probability that a URL is a phishing site using a list of features.
|
65 |
+
|
66 |
+
- **Model type:** Traditional machine learning
|
67 |
+
- **Task:** Tabular classification (Binary)
|
68 |
+
- **License:**: MIT
|
69 |
+
- **Repository:** https://github.com/pirocheto/phishing-url-detection
|
70 |
+
|
71 |
|
72 |
+
## Evaluation
|
73 |
|
74 |
| Metric | Value |
|
75 |
|-----------|----------|
|
|
|
78 |
| precision | 0.951996 |
|
79 |
| recall | 0.938331 |
|
80 |
|
81 |
+
# How to Get Started with the Model
|
|
|
82 |
|
|
|
83 |
|
84 |
+
Using pickle in Python is discouraged due to security risks during data deserialization, potentially allowing code injection.
|
85 |
+
It lacks portability across Python versions and interoperability with other languages.
|
|
|
|
|
|
|
86 |
|
87 |
+
Instead, we recommend using the ONNX model, which is more secure.
|
88 |
+
It is half the size and almost twice as fast compared to the pickle model.
|
89 |
+
Additionally, it can be utilized by languages supported by the [ONNX runtime](https://onnxruntime.ai/docs/get-started/) (see below for an example using JavaScript).
|
90 |
|
|
|
91 |
|
92 |
## With ONNX (recommanded)
|
93 |
|
94 |
+
### Python
|
95 |
+
|
96 |
```python
|
97 |
import onnxruntime
|
98 |
import pandas as pd
|
|
|
137 |
print(f"Likelihood of being a phishing site: {proba[1] * 100:.2f}%")
|
138 |
print("----")
|
139 |
|
140 |
+
# Expected output:
|
141 |
# URL: https://www.rga.com/about/workplace
|
142 |
# Likelihood of being a phishing site: 0.89%
|
143 |
# ----
|
144 |
|
145 |
```
|
146 |
+
|
147 |
+
### JavaScript
|
148 |
+
|
149 |
+
```javascript
|
150 |
+
const ort = require('onnxruntime-node');
|
151 |
+
|
152 |
+
const data = [
|
153 |
+
{
|
154 |
+
"url": "http://rapidpaws.com/wp-content/we_transfer/index2.php?email=/",
|
155 |
+
"nb_hyperlinks": 1,
|
156 |
+
"ratio_intHyperlinks": 1,
|
157 |
+
"ratio_extHyperlinks": 0,
|
158 |
+
"ratio_extRedirection": 0,
|
159 |
+
"safe_anchor": 0,
|
160 |
+
"domain_registration_length": 338,
|
161 |
+
"domain_age": 0,
|
162 |
+
"web_traffic":1853,
|
163 |
+
"google_index": 1,
|
164 |
+
"page_rank": 2,
|
165 |
+
},
|
166 |
+
];
|
167 |
+
|
168 |
+
async function main() {
|
169 |
+
try {
|
170 |
+
// Make sure you have downloaded the model.onnx
|
171 |
+
// Creating an ONNX inference session with the specified model
|
172 |
+
const model_path = "./models/model.onnx";
|
173 |
+
const session = await ort.InferenceSession.create(model_path);
|
174 |
+
|
175 |
+
// Creating an ONNX tensor from the input data
|
176 |
+
const inputs = data.map(url => Object.values(url).slice(1));
|
177 |
+
const flattenInputs = inputs.flat();
|
178 |
+
const tensor = new ort.Tensor('float32', flattenInputs, [inputs.length, 10]);
|
179 |
+
|
180 |
+
// Executing the inference session with the input tensor
|
181 |
+
const results = await session.run({"X": tensor});
|
182 |
+
const probas = results['probabilities'].data;
|
183 |
+
|
184 |
+
// Displaying results for each URL
|
185 |
+
data.forEach((url, index) => {
|
186 |
+
const proba = probas[index * 2 + 1];
|
187 |
+
const percent = (proba * 100).toFixed(2);
|
188 |
+
|
189 |
+
console.log(`URL: ${url.url}`);
|
190 |
+
console.log(`Likelihood of being a phishing site: ${percent}%`);
|
191 |
+
console.log("----");
|
192 |
+
});
|
193 |
+
|
194 |
+
|
195 |
+
} catch (e) {
|
196 |
+
console.log(`failed to inference ONNX model: ${e}.`);
|
197 |
+
}
|
198 |
+
};
|
199 |
+
|
200 |
+
main();
|
201 |
+
|
202 |
+
// Expected output:
|
203 |
+
// URL: https://www.rga.com/about/workplace
|
204 |
+
// Likelihood of being a phishing site: 0.89%
|
205 |
+
// ----
|
206 |
+
|
207 |
+
```
|
model.onnx
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4685fcb655e211a8e5c1acfeea93377b4e7005b6d5e8670e727d75b3a08b3d1
|
3 |
+
size 22232006
|
model.pkl
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57bd4b9a3920643dabf348f853ca7710803ac2654b3f91577cc2a38f581d6908
|
3 |
+
size 46071707
|