Canstralian commited on
Commit
2e7f3da
·
verified ·
1 Parent(s): 11a7a55

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +84 -0
main.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from typing import List, Dict
3
+ import pandas as pd
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+
7
+ app = FastAPI()
8
+
9
+ # Global variable to store the dataset
10
+ kali_tools_df = None
11
+
12
+ def scrape_kali_tools(base_url: str = "https://www.kali.org/tools/") -> pd.DataFrame:
13
+ """
14
+ Scrapes the Kali Linux tools documentation page and returns a structured dataset.
15
+
16
+ Parameters:
17
+ - base_url: The URL of the Kali Linux tools documentation.
18
+
19
+ Returns:
20
+ - Pandas DataFrame containing tool names, descriptions, and links.
21
+ """
22
+ response = requests.get(base_url)
23
+ response.raise_for_status()
24
+ soup = BeautifulSoup(response.text, "html.parser")
25
+
26
+ # Extract tool names and descriptions
27
+ tools = []
28
+ for tool in soup.select(".tools--index__item"):
29
+ name = tool.select_one(".tools--index__title").get_text(strip=True)
30
+ description = tool.select_one(".tools--index__description").get_text(strip=True)
31
+ link = tool.find("a", href=True)["href"]
32
+ tools.append({"name": name, "description": description, "link": link})
33
+
34
+ # Convert to DataFrame
35
+ return pd.DataFrame(tools)
36
+
37
+ @app.get("/scrape_kali_tools/")
38
+ def scrape_kali_tools_endpoint():
39
+ """
40
+ Scrapes the Kali Linux tools documentation and stores it in memory.
41
+
42
+ Returns:
43
+ - Message indicating the dataset has been created.
44
+ """
45
+ global kali_tools_df
46
+ kali_tools_df = scrape_kali_tools()
47
+ return {"message": f"Scraped {len(kali_tools_df)} tools from Kali Linux documentation."}
48
+
49
+ @app.get("/get_kali_tools/")
50
+ def get_kali_tools(start: int = 0, limit: int = 10) -> List[Dict]:
51
+ """
52
+ Fetches a chunk of the Kali tools dataset.
53
+
54
+ Parameters:
55
+ - start: Starting index of the tools to fetch.
56
+ - limit: Number of tools to return.
57
+
58
+ Returns:
59
+ - A list of tools with their names, descriptions, and links.
60
+ """
61
+ if kali_tools_df is None:
62
+ return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."}
63
+
64
+ return kali_tools_df.iloc[start:start + limit].to_dict(orient="records")
65
+
66
+ @app.get("/search_kali_tools/")
67
+ def search_kali_tools(keyword: str) -> List[Dict]:
68
+ """
69
+ Searches the Kali tools dataset for a specific keyword.
70
+
71
+ Parameters:
72
+ - keyword: Keyword to search in tool names or descriptions.
73
+
74
+ Returns:
75
+ - A list of tools matching the keyword.
76
+ """
77
+ if kali_tools_df is None:
78
+ return {"error": "Dataset not yet scraped. Call /scrape_kali_tools first."}
79
+
80
+ results = kali_tools_df[
81
+ kali_tools_df["name"].str.contains(keyword, case=False) |
82
+ kali_tools_df["description"].str.contains(keyword, case=False)
83
+ ]
84
+ return results.to_dict(orient="records")