Mr-Vicky-01 commited on
Commit
4ab5c71
·
verified ·
1 Parent(s): 67b0869

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -19
README.md CHANGED
@@ -13,25 +13,44 @@ import json
13
  import requests
14
 
15
  def convert_to_json(answer):
16
- # Replace '<pad>' and '</s>' as per the provided logic
 
 
 
 
 
 
 
 
 
 
 
 
17
  answer = answer.replace("<pad>", "").replace("</s>", "")
18
- answer = answer.strip("'") # Remove leading and trailing single quotes
19
-
20
- # Replace 'false' with 'False' and 'true' with 'True' for Python compatibility
21
  answer = answer.replace("false", "False").replace("true", "True")
22
-
23
- # Convert the string to a dictionary
24
  answer_dict = eval(answer)
25
-
26
- # Convert the dictionary to a JSON object
27
  answer_json = json.dumps(answer_dict)
28
-
29
- # Load the JSON object to ensure it's correctly formatted
30
  json_data = json.loads(answer_json)
31
-
32
  return json_data
33
 
34
  def valid_url(url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  valid_list = [
36
  "github.com", "bitbucket.org", "sourceforge.net", "aws.amazon.com",
37
  "dev.azure.com", "gitea.com", "gogs.io", "phabricator.com",
@@ -41,8 +60,10 @@ def valid_url(url):
41
 
42
  if platform in valid_list:
43
  return True
44
- else:
45
- return {'message': 'provide an valid uri for scanning. currently we support pii_scanner, sast_scanner, sca_scanner, iac_scanner, container_scanner, malware_scanner, api_scanner'}
 
 
46
  ```
47
 
48
  ```python
@@ -50,8 +71,8 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
50
  import torch
51
  import time
52
 
53
- tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/NL-JSON-60M")
54
- model = AutoModelForSeq2SeqLM.from_pretrained("AquilaX-AI/NL-JSON-60M")
55
 
56
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
57
 
@@ -67,15 +88,20 @@ inputs = inputs.to(device)
67
  outputs = model.generate(**inputs, max_length=256)
68
  answer = tokenizer.decode(outputs[0])
69
  json_data = convert_to_json(answer)
 
 
70
  try:
71
  valid = valid_url(json_data["repo"])
72
- if valid == True:
73
- print(json_data)
74
  else:
75
- print(valid)
 
 
76
  except:
77
- print(json_data)
78
 
79
  end = time.time()
 
80
  print(f"Time taken: {end - start}")
81
  ```
 
13
  import requests
14
 
15
  def convert_to_json(answer):
16
+ """
17
+ Convert a string representation of a dictionary to a JSON object.
18
+
19
+ This function takes a string representation of a dictionary, cleans it by removing
20
+ specific unwanted tokens and correcting boolean representations, and then converts
21
+ it into a JSON object.
22
+
23
+ Parameters:
24
+ answer (str): The input string representing a dictionary.
25
+
26
+ Returns:
27
+ dict: The JSON object converted from the input string.
28
+ """
29
  answer = answer.replace("<pad>", "").replace("</s>", "")
30
+ answer = answer.strip("'")
 
 
31
  answer = answer.replace("false", "False").replace("true", "True")
 
 
32
  answer_dict = eval(answer)
 
 
33
  answer_json = json.dumps(answer_dict)
 
 
34
  json_data = json.loads(answer_json)
 
35
  return json_data
36
 
37
  def valid_url(url):
38
+ """
39
+ Validate the given URL against a list of supported platforms.
40
+
41
+ This function checks if the provided URL belongs to one of the supported
42
+ platforms for scanning. If the URL is valid, it returns True. Otherwise,
43
+ it returns a message indicating that the URL is not supported and lists the
44
+ available scanners.
45
+
46
+ Parameters:
47
+ url (str): The URL to be validated.
48
+
49
+ Returns:
50
+ bool or dict: Returns True if the URL is valid, otherwise returns a
51
+ dictionary with a message indicating the URL is not supported
52
+ and lists the available scanners.
53
+ """
54
  valid_list = [
55
  "github.com", "bitbucket.org", "sourceforge.net", "aws.amazon.com",
56
  "dev.azure.com", "gitea.com", "gogs.io", "phabricator.com",
 
60
 
61
  if platform in valid_list:
62
  return True
63
+
64
+ return {
65
+ 'message': 'Provide a valid URL for scanning. Currently, we support PII_Scanner, SAST_Scanner, Sac_Scanner (Open_Source_Security), IaC_Scanner, Container_Scanner'
66
+ }
67
  ```
68
 
69
  ```python
 
71
  import torch
72
  import time
73
 
74
+ tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/NL-JSON-Start-Scan")
75
+ model = AutoModelForSeq2SeqLM.from_pretrained("AquilaX-AI/NL-JSON-Start-Scan")
76
 
77
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
78
 
 
88
  outputs = model.generate(**inputs, max_length=256)
89
  answer = tokenizer.decode(outputs[0])
90
  json_data = convert_to_json(answer)
91
+
92
+ to_return = json_data.copy()
93
  try:
94
  valid = valid_url(json_data["repo"])
95
+ if valid != True:
96
+ to_return = valid
97
  else:
98
+ url = re.findall(r'https?://\S+', query)
99
+ to_return['repo'] = url
100
+
101
  except:
102
+ pass
103
 
104
  end = time.time()
105
+ print(to_return)
106
  print(f"Time taken: {end - start}")
107
  ```