Update README.md
Browse files
README.md
CHANGED
@@ -13,25 +13,44 @@ import json
|
|
13 |
import requests
|
14 |
|
15 |
def convert_to_json(answer):
|
16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
answer = answer.replace("<pad>", "").replace("</s>", "")
|
18 |
-
answer = answer.strip("'")
|
19 |
-
|
20 |
-
# Replace 'false' with 'False' and 'true' with 'True' for Python compatibility
|
21 |
answer = answer.replace("false", "False").replace("true", "True")
|
22 |
-
|
23 |
-
# Convert the string to a dictionary
|
24 |
answer_dict = eval(answer)
|
25 |
-
|
26 |
-
# Convert the dictionary to a JSON object
|
27 |
answer_json = json.dumps(answer_dict)
|
28 |
-
|
29 |
-
# Load the JSON object to ensure it's correctly formatted
|
30 |
json_data = json.loads(answer_json)
|
31 |
-
|
32 |
return json_data
|
33 |
|
34 |
def valid_url(url):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
valid_list = [
|
36 |
"github.com", "bitbucket.org", "sourceforge.net", "aws.amazon.com",
|
37 |
"dev.azure.com", "gitea.com", "gogs.io", "phabricator.com",
|
@@ -41,8 +60,10 @@ def valid_url(url):
|
|
41 |
|
42 |
if platform in valid_list:
|
43 |
return True
|
44 |
-
|
45 |
-
|
|
|
|
|
46 |
```
|
47 |
|
48 |
```python
|
@@ -50,8 +71,8 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
|
|
50 |
import torch
|
51 |
import time
|
52 |
|
53 |
-
tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/NL-JSON-
|
54 |
-
model = AutoModelForSeq2SeqLM.from_pretrained("AquilaX-AI/NL-JSON-
|
55 |
|
56 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
57 |
|
@@ -67,15 +88,20 @@ inputs = inputs.to(device)
|
|
67 |
outputs = model.generate(**inputs, max_length=256)
|
68 |
answer = tokenizer.decode(outputs[0])
|
69 |
json_data = convert_to_json(answer)
|
|
|
|
|
70 |
try:
|
71 |
valid = valid_url(json_data["repo"])
|
72 |
-
if valid
|
73 |
-
|
74 |
else:
|
75 |
-
|
|
|
|
|
76 |
except:
|
77 |
-
|
78 |
|
79 |
end = time.time()
|
|
|
80 |
print(f"Time taken: {end - start}")
|
81 |
```
|
|
|
13 |
import requests
|
14 |
|
15 |
def convert_to_json(answer):
|
16 |
+
"""
|
17 |
+
Convert a string representation of a dictionary to a JSON object.
|
18 |
+
|
19 |
+
This function takes a string representation of a dictionary, cleans it by removing
|
20 |
+
specific unwanted tokens and correcting boolean representations, and then converts
|
21 |
+
it into a JSON object.
|
22 |
+
|
23 |
+
Parameters:
|
24 |
+
answer (str): The input string representing a dictionary.
|
25 |
+
|
26 |
+
Returns:
|
27 |
+
dict: The JSON object converted from the input string.
|
28 |
+
"""
|
29 |
answer = answer.replace("<pad>", "").replace("</s>", "")
|
30 |
+
answer = answer.strip("'")
|
|
|
|
|
31 |
answer = answer.replace("false", "False").replace("true", "True")
|
|
|
|
|
32 |
answer_dict = eval(answer)
|
|
|
|
|
33 |
answer_json = json.dumps(answer_dict)
|
|
|
|
|
34 |
json_data = json.loads(answer_json)
|
|
|
35 |
return json_data
|
36 |
|
37 |
def valid_url(url):
|
38 |
+
"""
|
39 |
+
Validate the given URL against a list of supported platforms.
|
40 |
+
|
41 |
+
This function checks if the provided URL belongs to one of the supported
|
42 |
+
platforms for scanning. If the URL is valid, it returns True. Otherwise,
|
43 |
+
it returns a message indicating that the URL is not supported and lists the
|
44 |
+
available scanners.
|
45 |
+
|
46 |
+
Parameters:
|
47 |
+
url (str): The URL to be validated.
|
48 |
+
|
49 |
+
Returns:
|
50 |
+
bool or dict: Returns True if the URL is valid, otherwise returns a
|
51 |
+
dictionary with a message indicating the URL is not supported
|
52 |
+
and lists the available scanners.
|
53 |
+
"""
|
54 |
valid_list = [
|
55 |
"github.com", "bitbucket.org", "sourceforge.net", "aws.amazon.com",
|
56 |
"dev.azure.com", "gitea.com", "gogs.io", "phabricator.com",
|
|
|
60 |
|
61 |
if platform in valid_list:
|
62 |
return True
|
63 |
+
|
64 |
+
return {
|
65 |
+
'message': 'Provide a valid URL for scanning. Currently, we support PII_Scanner, SAST_Scanner, Sac_Scanner (Open_Source_Security), IaC_Scanner, Container_Scanner'
|
66 |
+
}
|
67 |
```
|
68 |
|
69 |
```python
|
|
|
71 |
import torch
|
72 |
import time
|
73 |
|
74 |
+
tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/NL-JSON-Start-Scan")
|
75 |
+
model = AutoModelForSeq2SeqLM.from_pretrained("AquilaX-AI/NL-JSON-Start-Scan")
|
76 |
|
77 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
78 |
|
|
|
88 |
outputs = model.generate(**inputs, max_length=256)
|
89 |
answer = tokenizer.decode(outputs[0])
|
90 |
json_data = convert_to_json(answer)
|
91 |
+
|
92 |
+
to_return = json_data.copy()
|
93 |
try:
|
94 |
valid = valid_url(json_data["repo"])
|
95 |
+
if valid != True:
|
96 |
+
to_return = valid
|
97 |
else:
|
98 |
+
url = re.findall(r'https?://\S+', query)
|
99 |
+
to_return['repo'] = url
|
100 |
+
|
101 |
except:
|
102 |
+
pass
|
103 |
|
104 |
end = time.time()
|
105 |
+
print(to_return)
|
106 |
print(f"Time taken: {end - start}")
|
107 |
```
|