File size: 3,189 Bytes
5ac1adf 67b0869 5ac1adf e59179c 4ab5c71 e59179c 4ab5c71 e59179c 4ab5c71 e59179c 4ab5c71 e59179c 4ab5c71 e59179c d4ff9a4 e59179c 4ab5c71 e59179c 4ab5c71 e59179c 4ab5c71 e59179c 4ab5c71 e59179c 4ab5c71 e59179c 67b0869 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
---
library_name: transformers
license: apache-2.0
---
### Requirements
```bash
pip install transformers[torch]
```
### Inference
```python
import json
import requests
def convert_to_json(answer):
"""
Convert a string representation of a dictionary to a JSON object.
This function takes a string representation of a dictionary, cleans it by removing
specific unwanted tokens and correcting boolean representations, and then converts
it into a JSON object.
Parameters:
answer (str): The input string representing a dictionary.
Returns:
dict: The JSON object converted from the input string.
"""
answer = answer.replace("<pad>", "").replace("</s>", "")
answer = answer.strip("'")
answer = answer.replace("false", "False").replace("true", "True")
answer_dict = eval(answer)
answer_json = json.dumps(answer_dict)
json_data = json.loads(answer_json)
return json_data
def valid_url(url):
"""
Validate the given URL against a list of supported platforms.
This function checks if the provided URL belongs to one of the supported
platforms for scanning. If the URL is valid, it returns True. Otherwise,
it returns a message indicating that the URL is not supported and lists the
available scanners.
Parameters:
url (str): The URL to be validated.
Returns:
bool or dict: Returns True if the URL is valid, otherwise returns a
dictionary with a message indicating the URL is not supported
and lists the available scanners.
"""
valid_list = [
"github.com", "bitbucket.org", "sourceforge.net", "aws.amazon.com",
"dev.azure.com", "gitea.com", "gogs.io", "phabricator.com",
"gitkraken.com", "beanstalkapp.com", "gitlab.com"
]
platform = url.split("//")[1].split("/")[0]
if platform in valid_list:
return True
return {
'message': 'Provide a valid URL for scanning. Currently, we support PII_Scanner, SAST_Scanner, Sac_Scanner (Open_Source_Security), IaC_Scanner, Container_Scanner'
}
```
```python
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import time
tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/NL-JSON-Start-Scan")
model = AutoModelForSeq2SeqLM.from_pretrained("AquilaX-AI/NL-JSON-Start-Scan")
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# Change YOUR_QUERY eg: can this https://github.com/mr-vicky-01/educational-assitant on every week using pii and sast scan
query = "Translate the following text to JSON: " + "YOUR_QUERY".lower()
query = query.replace(",", "")
start = time.time()
inputs = tokenizer(query, return_tensors="pt")
model.to(device)
inputs = inputs.to(device)
outputs = model.generate(**inputs, max_length=256)
answer = tokenizer.decode(outputs[0])
json_data = convert_to_json(answer)
to_return = json_data.copy()
try:
valid = valid_url(json_data["repo"])
if valid != True:
to_return = valid
else:
url = re.findall(r'https?://\S+', query)
to_return['repo'] = url
except:
pass
end = time.time()
print(to_return)
print(f"Time taken: {end - start}")
``` |