Spaces:
Sleeping
Sleeping
Update web_engine.py
Browse files- web_engine.py +68 -51
web_engine.py
CHANGED
@@ -1,51 +1,68 @@
|
|
1 |
-
import
|
2 |
-
|
3 |
-
from
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import aiohttp
|
2 |
+
import asyncio
|
3 |
+
from bs4 import BeautifulSoup
|
4 |
+
from urllib.parse import urlparse, parse_qs
|
5 |
+
|
6 |
+
# Асинхронный запрос к странице
|
7 |
+
async def fetch(session, url):
|
8 |
+
try:
|
9 |
+
async with session.get(url, timeout=10) as response:
|
10 |
+
return await response.text()
|
11 |
+
except Exception as e:
|
12 |
+
return ""
|
13 |
+
|
14 |
+
# Асинхронное получение текста страницы
|
15 |
+
async def get_page_text(session, url):
|
16 |
+
html = await fetch(session, url)
|
17 |
+
if not html:
|
18 |
+
return "Текст не найден"
|
19 |
+
soup = BeautifulSoup(html, 'html.parser')
|
20 |
+
body = soup.find('body')
|
21 |
+
if body:
|
22 |
+
return body.get_text(separator='\n', strip=True)
|
23 |
+
return "Текст не найден"
|
24 |
+
|
25 |
+
# Асинхронный поиск информации
|
26 |
+
async def search_info(prompt):
|
27 |
+
query = prompt.replace(' ', '+')
|
28 |
+
search_url = f"https://www.google.com/search?q={query}"
|
29 |
+
|
30 |
+
async with aiohttp.ClientSession() as session:
|
31 |
+
html = await fetch(session, search_url)
|
32 |
+
if not html:
|
33 |
+
return []
|
34 |
+
|
35 |
+
soup = BeautifulSoup(html, 'html.parser')
|
36 |
+
links = []
|
37 |
+
for item in soup.find_all('h3'):
|
38 |
+
parent = item.find_parent('a')
|
39 |
+
if parent and 'href' in parent.attrs:
|
40 |
+
link = parent['href']
|
41 |
+
parsed_url = urlparse(link)
|
42 |
+
if parsed_url.path == '/url':
|
43 |
+
query_params = parse_qs(parsed_url.query)
|
44 |
+
if 'q' in query_params:
|
45 |
+
links.append(query_params['q'][0])
|
46 |
+
|
47 |
+
return links
|
48 |
+
|
49 |
+
# Основной асинхронный цикл
|
50 |
+
async def main():
|
51 |
+
prompt = input("Введите запрос для поиска: ")
|
52 |
+
results = await search_info(prompt)
|
53 |
+
|
54 |
+
if not results:
|
55 |
+
print("Ничего не найдено.")
|
56 |
+
return
|
57 |
+
|
58 |
+
async with aiohttp.ClientSession() as session:
|
59 |
+
tasks = [get_page_text(session, link) for link in results[:5]] # Ограничение до 5 ссылок для скорости
|
60 |
+
texts = await asyncio.gather(*tasks)
|
61 |
+
|
62 |
+
for link, text in zip(results, texts):
|
63 |
+
print(f"Ссылка: {link}")
|
64 |
+
print(f"Текст: {text}\n")
|
65 |
+
|
66 |
+
# Запуск программы
|
67 |
+
if __name__ == "__main__":
|
68 |
+
asyncio.run(main())
|