Spaces:
Sleeping
Sleeping
import datetime | |
import json | |
import os | |
from logging import getLogger | |
import pandas as pd | |
import time | |
from dotenv import load_dotenv | |
from googleapiclient.discovery import build | |
load_dotenv(".env.dev") | |
api_key = os.getenv("GOOGLE_CUSTOM_SEARCH_API_KEY") | |
cx = os.getenv("SEARCH_ENGINE_ID") | |
gcsearch_client = build( | |
"customsearch", "v1", developerKey=api_key | |
) | |
temp_csv_file_path = os.getenv("TEMP_CSV_RESULTS_PATH") | |
logger = getLogger(__name__) | |
# Function to perform a search | |
def google_search(query): | |
today = datetime.datetime.today().strftime("%Y%m%d") | |
datetime.datetime.today().strftime("%Y/%m/%d %H:%M:%S") | |
out = [] | |
results = [] | |
for i in range(1, 2): | |
response = gcsearch_client.cse().list(q=query, cx=cx, start=i).execute() | |
if 'items' in response: | |
search_result = ( | |
gcsearch_client.cse().list(q=query, cx=cx, start=i) | |
.execute()["items"] | |
)[:5] | |
else: | |
search_result = () | |
for item in search_result: | |
results.append((item["title"], item["link"])) | |
time.sleep(0.5) | |
# query = query.replace("脱炭素", "省エネ") | |
# for i in range(1, 2): | |
# if 'items' in response: | |
# search_result = ( | |
# gcsearch_client.cse().list(q=query, cx=cx, start=i) | |
# .execute()["items"] | |
# )[:2] | |
# else: | |
# search_result = () | |
# for item in search_result: | |
# results.append((item["title"], item["link"])) | |
output = [] | |
for res in results: | |
temp = {"query": query, "title": res[0], "url": res[1]} | |
out.append(temp) | |
output.append(res[1]) | |
with open("output.json", "w") as f: | |
json.dump(output, f, indent=4) | |
jsonstr = json.dumps(out, ensure_ascii=False) | |
# /response_.json → ./response_.jsonに変更した | |
with open("./response_.json", mode="w") as response_file: | |
response_file.write(jsonstr) | |
logger.info(f'Search reuslt: {out}') | |
return out, jsonstr | |