import requests import json from chains.azure_openai import CustomAzureOpenAI from chains.decision_maker import DecisionMaker from chains.simple_chain import SimpleChain from bs4 import BeautifulSoup from chains.summary import WebSummary from langchain.utilities.google_search import GoogleSearchAPIWrapper from config import OPENAI_API_TYPE, OPENAI_API_VERSION, OPENAI_API_KEY, OPENAI_API_BASE, DEPLOYMENT_ID, GOOGLE_API_KEY, GOOGLE_CSE_ID class QAIBotChain: def __init__(self): self.llm = CustomAzureOpenAI(deployment_name=DEPLOYMENT_ID, openai_api_type=OPENAI_API_TYPE, openai_api_base=OPENAI_API_BASE, openai_api_version=OPENAI_API_VERSION, openai_api_key=OPENAI_API_KEY, temperature=0.0) self.decision = DecisionMaker() self.simple_chain = SimpleChain() self.summary = WebSummary() def run(self, question, custom_web_search=False, num_results=4): if custom_web_search: params = { "q": question, "v": "\{539C9DC1-663A-418D-82A4-662D34EE34BC\}", "p": 10, "l": "en", "s": "{EACE8DB5-668F-4357-9782-405070D28D11}", "itemid": "\{91F4101E-B1F3-4905-A832-96F703D3FBB1\}", } req = requests.get( "https://fptsoftware.com//sxa/search/results/?", params=params ) res = json.loads(req.text) results = [] for r in res["Results"][:num_results]: link = "https://fptsoftware.com" + r["Url"] results.append({"link": link}) else: decision = self.decision.predict(question=question) if "LLM Model" in decision: ai_response = self.simple_chain.predict(question=question) return ai_response, False else: search = GoogleSearchAPIWrapper(google_api_key=GOOGLE_API_KEY, google_cse_id=GOOGLE_CSE_ID) results = search.results(question, num_results=num_results) reference_results = [] display_append = [] for idx, result in enumerate(results): try: head = requests.head(result['link']) if "text/html" in head.headers['Content-Type']: html_response = requests.get(result['link']) soup = BeautifulSoup(html_response.content, "html.parser") if custom_web_search: title = result["title"] else: title = soup.find_all('title')[0].get_text() try: text = soup.get_text() lines = (line.strip() for line in text.splitlines()) # break multi-headlines into a line each chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) # drop blank lines text = '\n'.join(chunk for chunk in chunks if chunk) summary = self.web_summary.predict(question=question, doc=text) print("Can access", result['link']) except: print("Cannot access ", result['link']) reference_results.append([summary, result['link']]) display_append.append( f'{idx + 1}. {title}' ) except: continue return reference_results, display_append