H commited on
Commit
fd69a6c
·
1 Parent(s): 86d0fad

Fix baidusearch and duckduckgosearch (#1488)

Browse files

### What problem does this PR solve?



### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)

graph/component/baidu.py CHANGED
@@ -53,10 +53,11 @@ class Baidu(ComponentBase, ABC):
53
  url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
54
  title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
55
  body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
56
- baidu_res = [re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body) for url, title, body
57
- in zip(url_res, title_res, body_res)]
58
  del body_res, url_res, title_res
59
 
60
- print(baidu_res, ":::::::::::::::::::::::::::::::::")
61
- return Baidu.be_output(baidu_res)
 
 
62
 
 
53
  url_res = re.findall(r"'url': \\\"(.*?)\\\"}", response.text)
54
  title_res = re.findall(r"'title': \\\"(.*?)\\\",\\n", response.text)
55
  body_res = re.findall(r"\"contentText\":\"(.*?)\"", response.text)
56
+ baidu_res = [{"content": re.sub('<em>|</em>', '', '<a href="' + url + '">' + title + '</a> ' + body)} for url, title, body in zip(url_res, title_res, body_res)]
 
57
  del body_res, url_res, title_res
58
 
59
+ df = pd.DataFrame(baidu_res)
60
+ print(df, ":::::::::::::::::::::::::::::::::")
61
+
62
+ return df
63
 
graph/component/duckduckgosearch.py CHANGED
@@ -44,18 +44,19 @@ class DuckDuckGoSearch(ComponentBase, ABC):
44
  ans = self.get_input()
45
  ans = " - ".join(ans["content"]) if "content" in ans else ""
46
  if not ans:
47
- return Baidu.be_output(self._param.no)
48
 
49
  if self.channel == "text":
50
  with DDGS() as ddgs:
51
  # {'title': '', 'href': '', 'body': ''}
52
- duck_res = ['<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"] for i in
53
  ddgs.text(ans, max_results=self._param.top_n)]
54
  elif self.channel == "news":
55
  with DDGS() as ddgs:
56
  # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
57
- duck_res = ['<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"] for i in
58
  ddgs.news(ans, max_results=self._param.top_n)]
59
 
60
- print(duck_res, ":::::::::::::::::::::::::::::::::")
61
- return DuckDuckGoSearch.be_output(duck_res)
 
 
44
  ans = self.get_input()
45
  ans = " - ".join(ans["content"]) if "content" in ans else ""
46
  if not ans:
47
+ return DuckDuckGoSearch.be_output(self._param.no)
48
 
49
  if self.channel == "text":
50
  with DDGS() as ddgs:
51
  # {'title': '', 'href': '', 'body': ''}
52
+ duck_res = [{"content": '<a href="' + i["href"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
53
  ddgs.text(ans, max_results=self._param.top_n)]
54
  elif self.channel == "news":
55
  with DDGS() as ddgs:
56
  # {'date': '', 'title': '', 'body': '', 'url': '', 'image': '', 'source': ''}
57
+ duck_res = [{"content": '<a href="' + i["url"] + '">' + i["title"] + '</a> ' + i["body"]} for i in
58
  ddgs.news(ans, max_results=self._param.top_n)]
59
 
60
+ df = pd.DataFrame(duck_res)
61
+ print(df, ":::::::::::::::::::::::::::::::::")
62
+ return df