|
--- |
|
library_name: transformers |
|
tags: [] |
|
--- |
|
### Requirements |
|
```bash |
|
pip install transformers[torch] |
|
``` |
|
|
|
### Inference |
|
```python |
|
import json |
|
import requests |
|
|
|
def convert_to_json(answer): |
|
# Replace '<pad>' and '</s>' as per the provided logic |
|
answer = answer.replace("<pad>", "").replace("</s>", "") |
|
answer = answer.strip("'") # Remove leading and trailing single quotes |
|
|
|
# Replace 'false' with 'False' and 'true' with 'True' for Python compatibility |
|
answer = answer.replace("false", "False").replace("true", "True") |
|
|
|
# Convert the string to a dictionary |
|
answer_dict = eval(answer) |
|
|
|
# Convert the dictionary to a JSON object |
|
answer_json = json.dumps(answer_dict) |
|
|
|
# Load the JSON object to ensure it's correctly formatted |
|
json_data = json.loads(answer_json) |
|
|
|
return json_data |
|
|
|
def valid_url(url): |
|
valid_list = [ |
|
"github.com", "bitbucket.org", "sourceforge.net", "aws.amazon.com", |
|
"dev.azure.com", "gitea.com", "gogs.io", "phabricator.com", |
|
"gitkraken.com", "beanstalkapp.com", "gitlab.com" |
|
] |
|
platform = url.split("//")[1].split("/")[0] |
|
|
|
if platform in valid_list: |
|
return True |
|
|
|
return {'message': 'Provide a valid URL for scanning. Currently, we support PII_Scanner, SAST_Scanner, Sac_Scanner (Open_Source_Security), IaC_Scanner, Container_Scanner'} |
|
``` |
|
|
|
```python |
|
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM |
|
import torch |
|
import time |
|
|
|
tokenizer = AutoTokenizer.from_pretrained("AquilaX-AI/NL-JSON-60M") |
|
model = AutoModelForSeq2SeqLM.from_pretrained("AquilaX-AI/NL-JSON-60M") |
|
|
|
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") |
|
|
|
# Change YOUR_QUERY eg: can this https://github.com/mr-vicky-01/educational-assitant on every week using pii and sast scan |
|
query = "Translate the following text to JSON: " + "YOUR_QUERY".lower() |
|
query = query.replace(",", "") |
|
|
|
start = time.time() |
|
|
|
inputs = tokenizer(query, return_tensors="pt") |
|
model.to(device) |
|
inputs = inputs.to(device) |
|
outputs = model.generate(**inputs, max_length=256) |
|
answer = tokenizer.decode(outputs[0]) |
|
json_data = convert_to_json(answer) |
|
try: |
|
valid = valid_url(json_data["repo"]) |
|
if valid == True: |
|
print(json_data) |
|
else: |
|
print(valid) |
|
except: |
|
print(json_data) |
|
|
|
end = time.time() |
|
print(f"Time taken: {end - start}") |
|
``` |
|
|