H
Kevin Hu
commited on
Commit
·
4a00dc8
1
Parent(s):
3060a10
Add component google,Bing (#1737)
Browse files### What problem does this PR solve?
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
---------
Co-authored-by: Kevin Hu <[email protected]>
- graph/component/__init__.py +2 -0
- graph/component/bing.py +85 -0
- graph/component/google.py +96 -0
- requirements.txt +1 -0
- requirements_arm.txt +1 -0
- requirements_dev.txt +1 -0
graph/component/__init__.py
CHANGED
@@ -14,6 +14,8 @@ from .duckduckgo import DuckDuckGo, DuckDuckGoParam
|
|
14 |
from .wikipedia import Wikipedia, WikipediaParam
|
15 |
from .pubmed import PubMed, PubMedParam
|
16 |
from .arxiv import ArXiv, ArXivParam
|
|
|
|
|
17 |
|
18 |
|
19 |
def component_class(class_name):
|
|
|
14 |
from .wikipedia import Wikipedia, WikipediaParam
|
15 |
from .pubmed import PubMed, PubMedParam
|
16 |
from .arxiv import ArXiv, ArXivParam
|
17 |
+
from .google import Google, GoogleParam
|
18 |
+
from .bing import Bing, BingParam
|
19 |
|
20 |
|
21 |
def component_class(class_name):
|
graph/component/bing.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
#
|
16 |
+
from abc import ABC
|
17 |
+
import requests
|
18 |
+
import pandas as pd
|
19 |
+
from graph.settings import DEBUG
|
20 |
+
from graph.component.base import ComponentBase, ComponentParamBase
|
21 |
+
|
22 |
+
|
23 |
+
class BingParam(ComponentParamBase):
|
24 |
+
"""
|
25 |
+
Define the Bing component parameters.
|
26 |
+
"""
|
27 |
+
|
28 |
+
def __init__(self):
|
29 |
+
super().__init__()
|
30 |
+
self.top_n = 10
|
31 |
+
self.channel = "Webpages"
|
32 |
+
self.api_key = "YOUR_ACCESS_KEY"
|
33 |
+
self.country = "CN"
|
34 |
+
self.language = "en"
|
35 |
+
|
36 |
+
def check(self):
|
37 |
+
self.check_positive_integer(self.top_n, "Top N")
|
38 |
+
self.check_valid_value(self.channel, "Bing Web Search or Bing News", ["Webpages", "News"])
|
39 |
+
self.check_empty(self.api_key, "Bing subscription key")
|
40 |
+
self.check_valid_value(self.country, "Bing Country",
|
41 |
+
['AR', 'AU', 'AT', 'BE', 'BR', 'CA', 'CL', 'DK', 'FI', 'FR', 'DE', 'HK', 'IN', 'ID',
|
42 |
+
'IT', 'JP', 'KR', 'MY', 'MX', 'NL', 'NZ', 'NO', 'CN', 'PL', 'PT', 'PH', 'RU', 'SA',
|
43 |
+
'ZA', 'ES', 'SE', 'CH', 'TW', 'TR', 'GB', 'US'])
|
44 |
+
self.check_valid_value(self.language, "Bing Languages",
|
45 |
+
['ar', 'eu', 'bn', 'bg', 'ca', 'ns', 'nt', 'hr', 'cs', 'da', 'nl', 'en', 'gb', 'et',
|
46 |
+
'fi', 'fr', 'gl', 'de', 'gu', 'he', 'hi', 'hu', 'is', 'it', 'jp', 'kn', 'ko', 'lv',
|
47 |
+
'lt', 'ms', 'ml', 'mr', 'nb', 'pl', 'br', 'pt', 'pa', 'ro', 'ru', 'sr', 'sk', 'sl',
|
48 |
+
'es', 'sv', 'ta', 'te', 'th', 'tr', 'uk', 'vi'])
|
49 |
+
|
50 |
+
|
51 |
+
class Bing(ComponentBase, ABC):
|
52 |
+
component_name = "Bing"
|
53 |
+
|
54 |
+
def _run(self, history, **kwargs):
|
55 |
+
ans = self.get_input()
|
56 |
+
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
57 |
+
if not ans:
|
58 |
+
return Bing.be_output("")
|
59 |
+
|
60 |
+
try:
|
61 |
+
headers = {"Ocp-Apim-Subscription-Key": self._param.api_key, 'Accept-Language': self._param.language}
|
62 |
+
params = {"q": ans, "textDecorations": True, "textFormat": "HTML", "cc": self._param.country,
|
63 |
+
"answerCount": 1, "promote": self._param.channel}
|
64 |
+
if self._param.channel == "Webpages":
|
65 |
+
response = requests.get("https://api.bing.microsoft.com/v7.0/search", headers=headers, params=params)
|
66 |
+
response.raise_for_status()
|
67 |
+
search_results = response.json()
|
68 |
+
bing_res = [{"content": '<a href="' + i["url"] + '">' + i["name"] + '</a> ' + i["snippet"]} for i in
|
69 |
+
search_results["webPages"]["value"]]
|
70 |
+
elif self._param.channel == "News":
|
71 |
+
response = requests.get("https://api.bing.microsoft.com/v7.0/news/search", headers=headers,
|
72 |
+
params=params)
|
73 |
+
response.raise_for_status()
|
74 |
+
search_results = response.json()
|
75 |
+
bing_res = [{"content": '<a href="' + i["url"] + '">' + i["name"] + '</a> ' + i["description"]} for i
|
76 |
+
in search_results['news']['value']]
|
77 |
+
except Exception as e:
|
78 |
+
return Bing.be_output("**ERROR**: " + str(e))
|
79 |
+
|
80 |
+
if not bing_res:
|
81 |
+
return Bing.be_output("")
|
82 |
+
|
83 |
+
df = pd.DataFrame(bing_res)
|
84 |
+
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
85 |
+
return df
|
graph/component/google.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#
|
2 |
+
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
|
3 |
+
#
|
4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5 |
+
# you may not use this file except in compliance with the License.
|
6 |
+
# You may obtain a copy of the License at
|
7 |
+
#
|
8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9 |
+
#
|
10 |
+
# Unless required by applicable law or agreed to in writing, software
|
11 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13 |
+
# See the License for the specific language governing permissions and
|
14 |
+
# limitations under the License.
|
15 |
+
#
|
16 |
+
from abc import ABC
|
17 |
+
from serpapi import GoogleSearch
|
18 |
+
import pandas as pd
|
19 |
+
from graph.settings import DEBUG
|
20 |
+
from graph.component.base import ComponentBase, ComponentParamBase
|
21 |
+
|
22 |
+
|
23 |
+
class GoogleParam(ComponentParamBase):
|
24 |
+
"""
|
25 |
+
Define the Google component parameters.
|
26 |
+
"""
|
27 |
+
|
28 |
+
def __init__(self):
|
29 |
+
super().__init__()
|
30 |
+
self.top_n = 10
|
31 |
+
self.api_key = "xxx"
|
32 |
+
self.country = "cn"
|
33 |
+
self.language = "en"
|
34 |
+
|
35 |
+
def check(self):
|
36 |
+
self.check_positive_integer(self.top_n, "Top N")
|
37 |
+
self.check_empty(self.api_key, "SerpApi API key")
|
38 |
+
self.check_valid_value(self.country, "Google Country",
|
39 |
+
['af', 'al', 'dz', 'as', 'ad', 'ao', 'ai', 'aq', 'ag', 'ar', 'am', 'aw', 'au', 'at',
|
40 |
+
'az', 'bs', 'bh', 'bd', 'bb', 'by', 'be', 'bz', 'bj', 'bm', 'bt', 'bo', 'ba', 'bw',
|
41 |
+
'bv', 'br', 'io', 'bn', 'bg', 'bf', 'bi', 'kh', 'cm', 'ca', 'cv', 'ky', 'cf', 'td',
|
42 |
+
'cl', 'cn', 'cx', 'cc', 'co', 'km', 'cg', 'cd', 'ck', 'cr', 'ci', 'hr', 'cu', 'cy',
|
43 |
+
'cz', 'dk', 'dj', 'dm', 'do', 'ec', 'eg', 'sv', 'gq', 'er', 'ee', 'et', 'fk', 'fo',
|
44 |
+
'fj', 'fi', 'fr', 'gf', 'pf', 'tf', 'ga', 'gm', 'ge', 'de', 'gh', 'gi', 'gr', 'gl',
|
45 |
+
'gd', 'gp', 'gu', 'gt', 'gn', 'gw', 'gy', 'ht', 'hm', 'va', 'hn', 'hk', 'hu', 'is',
|
46 |
+
'in', 'id', 'ir', 'iq', 'ie', 'il', 'it', 'jm', 'jp', 'jo', 'kz', 'ke', 'ki', 'kp',
|
47 |
+
'kr', 'kw', 'kg', 'la', 'lv', 'lb', 'ls', 'lr', 'ly', 'li', 'lt', 'lu', 'mo', 'mk',
|
48 |
+
'mg', 'mw', 'my', 'mv', 'ml', 'mt', 'mh', 'mq', 'mr', 'mu', 'yt', 'mx', 'fm', 'md',
|
49 |
+
'mc', 'mn', 'ms', 'ma', 'mz', 'mm', 'na', 'nr', 'np', 'nl', 'an', 'nc', 'nz', 'ni',
|
50 |
+
'ne', 'ng', 'nu', 'nf', 'mp', 'no', 'om', 'pk', 'pw', 'ps', 'pa', 'pg', 'py', 'pe',
|
51 |
+
'ph', 'pn', 'pl', 'pt', 'pr', 'qa', 're', 'ro', 'ru', 'rw', 'sh', 'kn', 'lc', 'pm',
|
52 |
+
'vc', 'ws', 'sm', 'st', 'sa', 'sn', 'rs', 'sc', 'sl', 'sg', 'sk', 'si', 'sb', 'so',
|
53 |
+
'za', 'gs', 'es', 'lk', 'sd', 'sr', 'sj', 'sz', 'se', 'ch', 'sy', 'tw', 'tj', 'tz',
|
54 |
+
'th', 'tl', 'tg', 'tk', 'to', 'tt', 'tn', 'tr', 'tm', 'tc', 'tv', 'ug', 'ua', 'ae',
|
55 |
+
'uk', 'gb', 'us', 'um', 'uy', 'uz', 'vu', 've', 'vn', 'vg', 'vi', 'wf', 'eh', 'ye',
|
56 |
+
'zm', 'zw'])
|
57 |
+
self.check_valid_value(self.language, "Google languages",
|
58 |
+
['af', 'ak', 'sq', 'ws', 'am', 'ar', 'hy', 'az', 'eu', 'be', 'bem', 'bn', 'bh',
|
59 |
+
'xx-bork', 'bs', 'br', 'bg', 'bt', 'km', 'ca', 'chr', 'ny', 'zh-cn', 'zh-tw', 'co',
|
60 |
+
'hr', 'cs', 'da', 'nl', 'xx-elmer', 'en', 'eo', 'et', 'ee', 'fo', 'tl', 'fi', 'fr',
|
61 |
+
'fy', 'gaa', 'gl', 'ka', 'de', 'el', 'kl', 'gn', 'gu', 'xx-hacker', 'ht', 'ha', 'haw',
|
62 |
+
'iw', 'hi', 'hu', 'is', 'ig', 'id', 'ia', 'ga', 'it', 'ja', 'jw', 'kn', 'kk', 'rw',
|
63 |
+
'rn', 'xx-klingon', 'kg', 'ko', 'kri', 'ku', 'ckb', 'ky', 'lo', 'la', 'lv', 'ln', 'lt',
|
64 |
+
'loz', 'lg', 'ach', 'mk', 'mg', 'ms', 'ml', 'mt', 'mv', 'mi', 'mr', 'mfe', 'mo', 'mn',
|
65 |
+
'sr-me', 'my', 'ne', 'pcm', 'nso', 'no', 'nn', 'oc', 'or', 'om', 'ps', 'fa',
|
66 |
+
'xx-pirate', 'pl', 'pt', 'pt-br', 'pt-pt', 'pa', 'qu', 'ro', 'rm', 'nyn', 'ru', 'gd',
|
67 |
+
'sr', 'sh', 'st', 'tn', 'crs', 'sn', 'sd', 'si', 'sk', 'sl', 'so', 'es', 'es-419', 'su',
|
68 |
+
'sw', 'sv', 'tg', 'ta', 'tt', 'te', 'th', 'ti', 'to', 'lua', 'tum', 'tr', 'tk', 'tw',
|
69 |
+
'ug', 'uk', 'ur', 'uz', 'vu', 'vi', 'cy', 'wo', 'xh', 'yi', 'yo', 'zu']
|
70 |
+
)
|
71 |
+
|
72 |
+
|
73 |
+
class Google(ComponentBase, ABC):
|
74 |
+
component_name = "Google"
|
75 |
+
|
76 |
+
def _run(self, history, **kwargs):
|
77 |
+
ans = self.get_input()
|
78 |
+
ans = " - ".join(ans["content"]) if "content" in ans else ""
|
79 |
+
if not ans:
|
80 |
+
return Google.be_output("")
|
81 |
+
|
82 |
+
try:
|
83 |
+
client = GoogleSearch(
|
84 |
+
{"engine": "google", "q": ans, "api_key": self._param.api_key, "gl": self._param.country,
|
85 |
+
"hl": self._param.language, "num": self._param.top_n})
|
86 |
+
google_res = [{"content": '<a href="' + i["link"] + '">' + i["title"] + '</a> ' + i["snippet"]} for i in
|
87 |
+
client.get_dict()["organic_results"]]
|
88 |
+
except Exception as e:
|
89 |
+
return Google.be_output("**ERROR**: Existing Unavailable Parameters!")
|
90 |
+
|
91 |
+
if not google_res:
|
92 |
+
return Google.be_output("")
|
93 |
+
|
94 |
+
df = pd.DataFrame(google_res)
|
95 |
+
if DEBUG: print(df, ":::::::::::::::::::::::::::::::::")
|
96 |
+
return df
|
requirements.txt
CHANGED
@@ -23,6 +23,7 @@ Flask==3.0.3
|
|
23 |
Flask_Cors==4.0.1
|
24 |
Flask_Login==0.6.3
|
25 |
flask_session==0.8.0
|
|
|
26 |
groq==0.9.0
|
27 |
hanziconv==0.3.2
|
28 |
html_text==0.6.2
|
|
|
23 |
Flask_Cors==4.0.1
|
24 |
Flask_Login==0.6.3
|
25 |
flask_session==0.8.0
|
26 |
+
google_search_results==2.4.2
|
27 |
groq==0.9.0
|
28 |
hanziconv==0.3.2
|
29 |
html_text==0.6.2
|
requirements_arm.txt
CHANGED
@@ -154,3 +154,4 @@ wikipedia==1.4.0
|
|
154 |
Bio==1.7.1
|
155 |
arxiv==2.1.3
|
156 |
pypdf==4.3.0
|
|
|
|
154 |
Bio==1.7.1
|
155 |
arxiv==2.1.3
|
156 |
pypdf==4.3.0
|
157 |
+
google_search_results==2.4.2
|
requirements_dev.txt
CHANGED
@@ -139,3 +139,4 @@ wikipedia==1.4.0
|
|
139 |
Bio==1.7.1
|
140 |
arxiv==2.1.3
|
141 |
pypdf==4.3.0
|
|
|
|
139 |
Bio==1.7.1
|
140 |
arxiv==2.1.3
|
141 |
pypdf==4.3.0
|
142 |
+
google_search_results==2.4.2
|