H Kevin Hu commited on
Commit
4a00dc8
·
1 Parent(s): 3060a10

Add component google,Bing (#1737)

Browse files

### What problem does this PR solve?

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Kevin Hu <[email protected]>

graph/component/__init__.py CHANGED
@@ -14,6 +14,8 @@ from .duckduckgo import DuckDuckGo, DuckDuckGoParam
14
  from .wikipedia import Wikipedia, WikipediaParam
15
  from .pubmed import PubMed, PubMedParam
16
  from .arxiv import ArXiv, ArXivParam
 
 
17
 
18
 
19
  def component_class(class_name):
 
14
  from .wikipedia import Wikipedia, WikipediaParam
15
  from .pubmed import PubMed, PubMedParam
16
  from .arxiv import ArXiv, ArXivParam
17
+ from .google import Google, GoogleParam
18
+ from .bing import Bing, BingParam
19
 
20
 
21
  def component_class(class_name):
graph/component/bing.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ from abc import ABC
17
+ import requests
18
+ import pandas as pd
19
+ from graph.settings import DEBUG
20
+ from graph.component.base import ComponentBase, ComponentParamBase
21
+
22
+
23
+ class BingParam(ComponentParamBase):
24
+ """
25
+ Define the Bing component parameters.
26
+ """
27
+
28
+ def __init__(self):
29
+ super().__init__()
30
+ self.top_n = 10
31
+ self.channel = "Webpages"
32
+ self.api_key = "YOUR_ACCESS_KEY"
33
+ self.country = "CN"
34
+ self.language = "en"
35
+
36
+ def check(self):
37
+ self.check_positive_integer(self.top_n, "Top N")
38
+ self.check_valid_value(self.channel, "Bing Web Search or Bing News", ["Webpages", "News"])
39
+ self.check_empty(self.api_key, "Bing subscription key")
40
+ self.check_valid_value(self.country, "Bing Country",
41
+ ['AR', 'AU', 'AT', 'BE', 'BR', 'CA', 'CL', 'DK', 'FI', 'FR', 'DE', 'HK', 'IN', 'ID',
42
+ 'IT', 'JP', 'KR', 'MY', 'MX', 'NL', 'NZ', 'NO', 'CN', 'PL', 'PT', 'PH', 'RU', 'SA',
43
+ 'ZA', 'ES', 'SE', 'CH', 'TW', 'TR', 'GB', 'US'])
44
+ self.check_valid_value(self.language, "Bing Languages",
45
+ ['ar', 'eu', 'bn', 'bg', 'ca', 'ns', 'nt', 'hr', 'cs', 'da', 'nl', 'en', 'gb', 'et',
46
+ 'fi', 'fr', 'gl', 'de', 'gu', 'he', 'hi', 'hu', 'is', 'it', 'jp', 'kn', 'ko', 'lv',
47
+ 'lt', 'ms', 'ml', 'mr', 'nb', 'pl', 'br', 'pt', 'pa', 'ro', 'ru', 'sr', 'sk', 'sl',
48
+ 'es', 'sv', 'ta', 'te', 'th', 'tr', 'uk', 'vi'])
49
+
50
+
51
+ class Bing(ComponentBase, ABC):
52
+ component_name = "Bing"
53
+
54
+ def _run(self, history, **kwargs):
55
+ ans = self.get_input()
56
+ ans = " - ".join(ans["content"]) if "content" in ans else ""
57
+ if not ans:
58
+ return Bing.be_output("")
59
+
60
+ try:
61
+ headers = {"Ocp-Apim-Subscription-Key": self._param.api_key, 'Accept-Language': self._param.language}
62
+ params = {"q": ans, "textDecorations": True, "textFormat": "HTML", "cc": self._param.country,
63
+ "answerCount": 1, "promote": self._param.channel}
64
+ if self._param.channel == "Webpages":
65
+ response = requests.get("https://api.bing.microsoft.com/v7.0/search", headers=headers, params=params)
66
+ response.raise_for_status()
67
+ search_results = response.json()
68
+ bing_res = [{"content": '<a href="' + i["url"] + '">' + i["name"] + '</a> ' + i["snippet"]} for i in
69
+ search_results["webPages"]["value"]]
70
+ elif self._param.channel == "News":
71
+ response = requests.get("https://api.bing.microsoft.com/v7.0/news/search", headers=headers,
72
+ params=params)
73
+ response.raise_for_status()
74
+ search_results = response.json()
75
+ bing_res = [{"content": '<a href="' + i["url"] + '">' + i["name"] + '</a> ' + i["description"]} for i
76
+ in search_results['news']['value']]
77
+ except Exception as e:
78
+ return Bing.be_output("**ERROR**: " + str(e))
79
+
80
+ if not bing_res:
81
+ return Bing.be_output("")
82
+
83
+ df = pd.DataFrame(bing_res)
84
+ if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
85
+ return df
graph/component/google.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #
2
+ # Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+ from abc import ABC
17
+ from serpapi import GoogleSearch
18
+ import pandas as pd
19
+ from graph.settings import DEBUG
20
+ from graph.component.base import ComponentBase, ComponentParamBase
21
+
22
+
23
+ class GoogleParam(ComponentParamBase):
24
+ """
25
+ Define the Google component parameters.
26
+ """
27
+
28
+ def __init__(self):
29
+ super().__init__()
30
+ self.top_n = 10
31
+ self.api_key = "xxx"
32
+ self.country = "cn"
33
+ self.language = "en"
34
+
35
+ def check(self):
36
+ self.check_positive_integer(self.top_n, "Top N")
37
+ self.check_empty(self.api_key, "SerpApi API key")
38
+ self.check_valid_value(self.country, "Google Country",
39
+ ['af', 'al', 'dz', 'as', 'ad', 'ao', 'ai', 'aq', 'ag', 'ar', 'am', 'aw', 'au', 'at',
40
+ 'az', 'bs', 'bh', 'bd', 'bb', 'by', 'be', 'bz', 'bj', 'bm', 'bt', 'bo', 'ba', 'bw',
41
+ 'bv', 'br', 'io', 'bn', 'bg', 'bf', 'bi', 'kh', 'cm', 'ca', 'cv', 'ky', 'cf', 'td',
42
+ 'cl', 'cn', 'cx', 'cc', 'co', 'km', 'cg', 'cd', 'ck', 'cr', 'ci', 'hr', 'cu', 'cy',
43
+ 'cz', 'dk', 'dj', 'dm', 'do', 'ec', 'eg', 'sv', 'gq', 'er', 'ee', 'et', 'fk', 'fo',
44
+ 'fj', 'fi', 'fr', 'gf', 'pf', 'tf', 'ga', 'gm', 'ge', 'de', 'gh', 'gi', 'gr', 'gl',
45
+ 'gd', 'gp', 'gu', 'gt', 'gn', 'gw', 'gy', 'ht', 'hm', 'va', 'hn', 'hk', 'hu', 'is',
46
+ 'in', 'id', 'ir', 'iq', 'ie', 'il', 'it', 'jm', 'jp', 'jo', 'kz', 'ke', 'ki', 'kp',
47
+ 'kr', 'kw', 'kg', 'la', 'lv', 'lb', 'ls', 'lr', 'ly', 'li', 'lt', 'lu', 'mo', 'mk',
48
+ 'mg', 'mw', 'my', 'mv', 'ml', 'mt', 'mh', 'mq', 'mr', 'mu', 'yt', 'mx', 'fm', 'md',
49
+ 'mc', 'mn', 'ms', 'ma', 'mz', 'mm', 'na', 'nr', 'np', 'nl', 'an', 'nc', 'nz', 'ni',
50
+ 'ne', 'ng', 'nu', 'nf', 'mp', 'no', 'om', 'pk', 'pw', 'ps', 'pa', 'pg', 'py', 'pe',
51
+ 'ph', 'pn', 'pl', 'pt', 'pr', 'qa', 're', 'ro', 'ru', 'rw', 'sh', 'kn', 'lc', 'pm',
52
+ 'vc', 'ws', 'sm', 'st', 'sa', 'sn', 'rs', 'sc', 'sl', 'sg', 'sk', 'si', 'sb', 'so',
53
+ 'za', 'gs', 'es', 'lk', 'sd', 'sr', 'sj', 'sz', 'se', 'ch', 'sy', 'tw', 'tj', 'tz',
54
+ 'th', 'tl', 'tg', 'tk', 'to', 'tt', 'tn', 'tr', 'tm', 'tc', 'tv', 'ug', 'ua', 'ae',
55
+ 'uk', 'gb', 'us', 'um', 'uy', 'uz', 'vu', 've', 'vn', 'vg', 'vi', 'wf', 'eh', 'ye',
56
+ 'zm', 'zw'])
57
+ self.check_valid_value(self.language, "Google languages",
58
+ ['af', 'ak', 'sq', 'ws', 'am', 'ar', 'hy', 'az', 'eu', 'be', 'bem', 'bn', 'bh',
59
+ 'xx-bork', 'bs', 'br', 'bg', 'bt', 'km', 'ca', 'chr', 'ny', 'zh-cn', 'zh-tw', 'co',
60
+ 'hr', 'cs', 'da', 'nl', 'xx-elmer', 'en', 'eo', 'et', 'ee', 'fo', 'tl', 'fi', 'fr',
61
+ 'fy', 'gaa', 'gl', 'ka', 'de', 'el', 'kl', 'gn', 'gu', 'xx-hacker', 'ht', 'ha', 'haw',
62
+ 'iw', 'hi', 'hu', 'is', 'ig', 'id', 'ia', 'ga', 'it', 'ja', 'jw', 'kn', 'kk', 'rw',
63
+ 'rn', 'xx-klingon', 'kg', 'ko', 'kri', 'ku', 'ckb', 'ky', 'lo', 'la', 'lv', 'ln', 'lt',
64
+ 'loz', 'lg', 'ach', 'mk', 'mg', 'ms', 'ml', 'mt', 'mv', 'mi', 'mr', 'mfe', 'mo', 'mn',
65
+ 'sr-me', 'my', 'ne', 'pcm', 'nso', 'no', 'nn', 'oc', 'or', 'om', 'ps', 'fa',
66
+ 'xx-pirate', 'pl', 'pt', 'pt-br', 'pt-pt', 'pa', 'qu', 'ro', 'rm', 'nyn', 'ru', 'gd',
67
+ 'sr', 'sh', 'st', 'tn', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'es', 'es-419', 'su',
68
+ 'sw', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'ti', 'to', 'lua', 'tum', 'tr', 'tk', 'tw',
69
+ 'ug', 'uk', 'ur', 'uz', 'vu', 'vi', 'cy', 'wo', 'xh', 'yi', 'yo', 'zu']
70
+ )
71
+
72
+
73
+ class Google(ComponentBase, ABC):
74
+ component_name = "Google"
75
+
76
+ def _run(self, history, **kwargs):
77
+ ans = self.get_input()
78
+ ans = " - ".join(ans["content"]) if "content" in ans else ""
79
+ if not ans:
80
+ return Google.be_output("")
81
+
82
+ try:
83
+ client = GoogleSearch(
84
+ {"engine": "google", "q": ans, "api_key": self._param.api_key, "gl": self._param.country,
85
+ "hl": self._param.language, "num": self._param.top_n})
86
+ google_res = [{"content": '<a href="' + i["link"] + '">' + i["title"] + '</a> ' + i["snippet"]} for i in
87
+ client.get_dict()["organic_results"]]
88
+ except Exception as e:
89
+ return Google.be_output("**ERROR**: Existing Unavailable Parameters!")
90
+
91
+ if not google_res:
92
+ return Google.be_output("")
93
+
94
+ df = pd.DataFrame(google_res)
95
+ if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
96
+ return df
requirements.txt CHANGED
@@ -23,6 +23,7 @@ Flask==3.0.3
23
  Flask_Cors==4.0.1
24
  Flask_Login==0.6.3
25
  flask_session==0.8.0
 
26
  groq==0.9.0
27
  hanziconv==0.3.2
28
  html_text==0.6.2
 
23
  Flask_Cors==4.0.1
24
  Flask_Login==0.6.3
25
  flask_session==0.8.0
26
+ google_search_results==2.4.2
27
  groq==0.9.0
28
  hanziconv==0.3.2
29
  html_text==0.6.2
requirements_arm.txt CHANGED
@@ -154,3 +154,4 @@ wikipedia==1.4.0
154
  Bio==1.7.1
155
  arxiv==2.1.3
156
  pypdf==4.3.0
 
 
154
  Bio==1.7.1
155
  arxiv==2.1.3
156
  pypdf==4.3.0
157
+ google_search_results==2.4.2
requirements_dev.txt CHANGED
@@ -139,3 +139,4 @@ wikipedia==1.4.0
139
  Bio==1.7.1
140
  arxiv==2.1.3
141
  pypdf==4.3.0
 
 
139
  Bio==1.7.1
140
  arxiv==2.1.3
141
  pypdf==4.3.0
142
+ google_search_results==2.4.2