Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python | |
| # -*- coding:utf-8 _*- | |
| """ | |
| @author:quincy qiang | |
| @license: Apache Licence | |
| @file: search.py | |
| @time: 2023/04/17 | |
| @contact: [email protected] | |
| @software: PyCharm | |
| @description: coding.. | |
| """ | |
| import os | |
| from duckduckgo_search import ddg | |
| from duckduckgo_search.utils import SESSION | |
| from langchain.document_loaders import UnstructuredFileLoader | |
| from langchain.embeddings.huggingface import HuggingFaceEmbeddings | |
| from langchain.vectorstores import FAISS | |
| class SourceService(object): | |
| def __init__(self, config): | |
| self.vector_store = None | |
| self.config = config | |
| self.embeddings = HuggingFaceEmbeddings(model_name=self.config.embedding_model_name) | |
| self.docs_path = self.config.docs_path | |
| self.vector_store_path = self.config.vector_store_path | |
| def init_source_vector(self): | |
| """ | |
| 初始化本地知识库向量 | |
| :return: | |
| """ | |
| docs = [] | |
| for doc in os.listdir(self.docs_path): | |
| if doc.endswith('.txt'): | |
| print(doc) | |
| loader = UnstructuredFileLoader(f'{self.docs_path}/{doc}', mode="elements") | |
| doc = loader.load() | |
| docs.extend(doc) | |
| self.vector_store = FAISS.from_documents(docs, self.embeddings) | |
| self.vector_store.save_local(self.vector_store_path) | |
| def add_document(self, document_path): | |
| loader = UnstructuredFileLoader(document_path, mode="elements") | |
| doc = loader.load() | |
| self.vector_store.add_documents(doc) | |
| self.vector_store.save_local(self.vector_store_path) | |
| def load_vector_store(self, path): | |
| if path is None: | |
| self.vector_store = FAISS.load_local(self.vector_store_path, self.embeddings) | |
| else: | |
| self.vector_store = FAISS.load_local(path, self.embeddings) | |
| return self.vector_store | |
| def search_web(self, query): | |
| SESSION.proxies = { | |
| "http": f"socks5h://localhost:7890", | |
| "https": f"socks5h://localhost:7890" | |
| } | |
| results = ddg(query) | |
| web_content = '' | |
| if results: | |
| for result in results: | |
| web_content += result['body'] | |
| return web_content | |
| # if __name__ == '__main__': | |
| # config = LangChainCFG() | |
| # source_service = SourceService(config) | |
| # source_service.init_source_vector() | |
| # search_result = source_service.vector_store.similarity_search_with_score('科比') | |
| # print(search_result) | |
| # | |
| # source_service.add_document('/home/searchgpt/yq/Knowledge-ChatGLM/docs/added/科比.txt') | |
| # search_result = source_service.vector_store.similarity_search_with_score('科比') | |
| # print(search_result) | |
| # | |
| # vector_store=source_service.load_vector_store() | |
| # search_result = source_service.vector_store.similarity_search_with_score('科比') | |
| # print(search_result) | |