Chat_QnA_v2 / chains /qaibot_chain.py
binh99's picture
update cosmos db
a4b89be
import requests
import json
from chains.azure_openai import CustomAzureOpenAI
from chains.decision_maker import DecisionMaker
from chains.simple_chain import SimpleChain
from bs4 import BeautifulSoup
from chains.summary import WebSummary
from langchain.utilities.google_search import GoogleSearchAPIWrapper
from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID, GOOGLE_API_KEY, GOOGLE_CSE_ID
class QAIBotChain:
def __init__(self):
self.llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID,
openai_api_type=OPENAI_API_TYPE,
openai_api_base=OPENAI_API_BASE,
openai_api_version=OPENAI_API_VERSION,
openai_api_key=OPENAI_API_KEY,
temperature=0.0)
self.decision = DecisionMaker()
self.simple_chain = SimpleChain()
self.summary = WebSummary()
def run(self, question, custom_web_search=False, num_results=4):
if custom_web_search:
params = {
"q": question,
"v": "\{539C9DC1-663A-418D-82A4-662D34EE34BC\}",
"p": 10,
"l": "en",
"s": "{EACE8DB5-668F-4357-9782-405070D28D11}",
"itemid": "\{91F4101E-B1F3-4905-A832-96F703D3FBB1\}",
}
req = requests.get(
"https://fptsoftware.com//sxa/search/results/?",
params=params
)
res = json.loads(req.text)
results = []
for r in res["Results"][:num_results]:
link = "https://fptsoftware.com" + r["Url"]
results.append({"link": link})
else:
decision = self.decision.predict(question=question)
if "LLM Model" in decision:
ai_response = self.simple_chain.predict(question=question)
return ai_response, False
else:
search = GoogleSearchAPIWrapper(google_api_key=GOOGLE_API_KEY, google_cse_id=GOOGLE_CSE_ID)
results = search.results(question, num_results=num_results)
reference_results = []
display_append = []
for idx, result in enumerate(results):
try:
head = requests.head(result['link'])
if "text/html" in head.headers['Content-Type']:
html_response = requests.get(result['link'])
soup = BeautifulSoup(html_response.content, "html.parser")
if custom_web_search:
title = result["title"]
else:
title = soup.find_all('title')[0].get_text()
try:
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
# break multi-headlines into a line each
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
# drop blank lines
text = '\n'.join(chunk for chunk in chunks if chunk)
summary = self.web_summary.predict(question=question, doc=text)
print("Can access", result['link'])
except:
print("Cannot access ", result['link'])
reference_results.append([summary, result['link']])
display_append.append(
f'<a href=\"{result["link"]}\" target=\"_blank\">{idx + 1}.&nbsp;{title}</a>'
)
except:
continue
return reference_results, display_append