randydev commited on
Commit
4d37bef
·
verified ·
1 Parent(s): e4e3c00

Create libgen.py

Browse files
Files changed (1) hide show
  1. torrents/libgen.py +120 -0
torrents/libgen.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import time
3
+ import aiohttp
4
+ from bs4 import BeautifulSoup
5
+ from helper.asyncioPoliciesFix import decorator_asyncio_fix
6
+ from helper.html_scraper import Scraper
7
+ from constants.base_url import LIBGEN
8
+ from constants.headers import HEADER_AIO
9
+
10
+
11
+ class Libgen:
12
+ def __init__(self):
13
+ self.BASE_URL = LIBGEN
14
+ self.LIMIT = None
15
+
16
+ @decorator_asyncio_fix
17
+ async def _individual_scrap(self, session, url, obj, sem):
18
+ async with sem:
19
+ try:
20
+ async with session.get(url, headers=HEADER_AIO) as res:
21
+ html = await res.text(encoding="ISO-8859-1")
22
+ soup = BeautifulSoup(html, "html.parser")
23
+ try:
24
+ x = soup.find_all("a")
25
+ for a in x:
26
+ if a.text == "One-filetorrent":
27
+ if a["href"] != "#":
28
+ obj["torrent"] = self.BASE_URL + a["href"]
29
+ poster = soup.find_all("img")[0]
30
+
31
+ if poster:
32
+ obj["poster"] = "http://library.lol" + poster["src"]
33
+ except:
34
+ ...
35
+ except:
36
+ return None
37
+
38
+ async def _get_torrent(self, result, session, urls):
39
+ tasks = []
40
+ sem = asyncio.Semaphore(3)
41
+ for idx, url in enumerate(urls):
42
+ for obj in result["data"]:
43
+ if obj["url"] == url:
44
+ task = asyncio.create_task(
45
+ self._individual_scrap(session, url, result["data"][idx], sem)
46
+ )
47
+ tasks.append(task)
48
+ await asyncio.gather(*tasks)
49
+ return result
50
+
51
+ def _parser(self, htmls):
52
+ try:
53
+ for html in htmls:
54
+ soup = BeautifulSoup(html, "html.parser")
55
+ list_of_urls = []
56
+ my_dict = {"data": []}
57
+ trs = soup.select("[valign=top]")
58
+ for tr in trs[1:]:
59
+ td = tr.find_all("td")
60
+ id = td[0].text
61
+ authors = []
62
+ author = td[1].find_all("a")
63
+ for a in author:
64
+ authors.append(a.text.strip())
65
+ name_and_url = td[2].find("a")
66
+ name = name_and_url.text
67
+ url = self.BASE_URL + "/" + name_and_url["href"]
68
+ list_of_urls.append(url)
69
+ publisher = td[3].text
70
+ year = td[4].text
71
+ pages = None
72
+ try:
73
+ pages = td[5].text
74
+ except:
75
+ ...
76
+ language = td[6].text
77
+ size = td[7].text
78
+ extension = td[8].text
79
+
80
+ my_dict["data"].append(
81
+ {
82
+ "id": id,
83
+ "authors": authors,
84
+ "name": name,
85
+ "publisher": publisher,
86
+ "year": year,
87
+ "pages": pages,
88
+ "language": language,
89
+ "size": size,
90
+ "extension": extension,
91
+ "url": url,
92
+ }
93
+ )
94
+ if len(my_dict["data"]) == self.LIMIT:
95
+ break
96
+ return my_dict, list_of_urls
97
+ except:
98
+ return None, None
99
+
100
+ async def search(self, query, page, limit):
101
+ async with aiohttp.ClientSession() as session:
102
+ start_time = time.time()
103
+ self.LIMIT = limit
104
+ url = (
105
+ self.BASE_URL
106
+ + "/search.php?req={}&lg_topic=libgen&open=0&view=simple&res=100&phrase=1&column=def".format(
107
+ query
108
+ )
109
+ )
110
+ return await self.parser_result(start_time, url, session)
111
+
112
+ async def parser_result(self, start_time, url, session):
113
+ htmls = await Scraper().get_all_results(session, url)
114
+ result, urls = self._parser(htmls)
115
+ if result is not None:
116
+ results = await self._get_torrent(result, session, urls)
117
+ results["time"] = time.time() - start_time
118
+ results["total"] = len(results["data"])
119
+ return results
120
+ return result