File size: 1,727 Bytes
4f8ad24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import os
from functools import lru_cache
from typing import Iterator, Tuple, Union
from urllib.parse import quote_plus, urljoin

from hbutils.system import urlsplit

from .web import WebDataSource
from ..utils import get_requests_session, srequest


class Huashi6Source(WebDataSource):
    __img_site_url__ = 'https://img2.huashi6.com'

    def __init__(self, word: str,
                 group_name: str = 'huashi6', download_silent: bool = True):
        WebDataSource.__init__(self, group_name, get_requests_session(), download_silent)
        self.word = word

    @classmethod
    @lru_cache()
    def _get_img_site_url(cls):
        return cls.__img_site_url__

    def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]:
        page = 1

        while True:
            resp = srequest(self.session, 'POST', "https://rt.huashi6.com/search/all", data={
                'word': self.word,
                'index': str(page),
            }, headers={
                "referrer": f"https://www.huashi6.com/search?searchText={quote_plus(self.word)}&p={page}",
            })
            raw = resp.json()['data']
            if 'works' not in raw or not raw['works']:
                break

            for post in raw['works']:
                url = urljoin(self._get_img_site_url(), post['coverImage']['path'])
                _, ext_name = os.path.splitext(urlsplit(url).filename)
                filename = f'{self.group_name}_{post["id"]}{ext_name}'
                meta = {
                    'huashi6': post,
                    'group_id': f'{self.group_name}_{post["id"]}',
                    'filename': filename,
                }
                yield post['id'], url, meta

            page += 1