| from langchain.embeddings import OpenAIEmbeddings | |
| from langchain.vectorstores import Chroma | |
| from langchain.llms import OpenAI | |
| from langchain.chains.question_answering import load_qa_chain | |
| import os | |
| import fitz | |
| import re | |
| import gradio as gr | |
| import time | |
| def preprocess(text): | |
| text = text.replace('\n', '') | |
| return text | |
| def pdf_to_text(path, start_page=1, end_page=None): | |
| doc = fitz.open(path) | |
| total_pages = doc.page_count | |
| if end_page is None: | |
| end_page = total_pages | |
| text_list = [] | |
| for i in range(start_page - 1, end_page): | |
| text = doc.load_page(i).get_text("text") | |
| text_list.append(text) | |
| doc.close() | |
| return text_list | |
| def law_split(path,name): | |
| text_list=pdf_to_text(path) | |
| text= ''.join(text_list) | |
| text_split=re.split(r'第.+条\s',text)[1:] | |
| for index, text in enumerate(text_split): | |
| text=preprocess(text) | |
| text_split[index]=f'《中华人民共和国{name}》 第{index+1}条 '+text | |
| return text_split | |
| def folder_read(path): | |
| text_list=[] | |
| paths=os.listdir(path) | |
| for file in paths: | |
| name=file.split('.')[0] | |
| suffix=file.split('.')[-1] | |
| if suffix=='pdf': | |
| text_list+=law_split(f'{path}/{file}',name) | |
| return text_list | |
| text_list=folder_read('laws') | |
| embeddings = OpenAIEmbeddings() | |
| vectordb = Chroma.from_texts(texts=text_list, embedding=embeddings) | |
| llm = OpenAI(temperature=0.5,max_tokens=1024) | |
| prompt=''' | |
| 请根据给出的法律条文回答问题,给出适当的法律建议。回答时要说出你引用的法律条文是第几条,并说出引用的每一条是哪部法律中的。 | |
| 引用的法律条文不要超过两条,回答尽量简明扼要 | |
| 如果问题与搜索结果无关,就仅回答"该问题与青少年法律无关"即可。 | |
| ''' | |
| def generate_answer(question): | |
| start_time = time.time() | |
| most_relevant_texts = vectordb.max_marginal_relevance_search(question, k=5) | |
| print(most_relevant_texts) | |
| chain = load_qa_chain(llm) | |
| answer = chain.run(input_documents=most_relevant_texts, question=question+prompt) | |
| run_time = time.time() - start_time | |
| return (answer, | |
| '\n'.join([t.page_content for t in most_relevant_texts]), | |
| run_time | |
| ) | |
| def ask_api(question): | |
| if question.strip() == '': | |
| return '[ERROR]: 未输入问题' | |
| return generate_answer(question) | |
| with gr.Blocks() as demo: | |
| gr.Markdown( | |
| """ | |
| # 青少年法律科普问答 | |
| 本bot旨在根据中华人民共和国的法律回答有关青少年的问题,目前囊括的法律有 | |
| 《未成年人保护法》 | |
| 《义务教育法》 | |
| 《预防未成年人犯罪法》 | |
| 《妇女儿童权益保护法》 | |
| """) | |
| with gr.Column(): | |
| text_input = gr.Textbox(label='请输入与青少年法律相关的问题') | |
| text_button = gr.Button("提交") | |
| gr.Examples( | |
| [["未成年遭受网络欺凌该怎么办?"],['年满多少岁的儿童应当接受义务教育?'],['遇到离家出走的未成年应该如何处理?']], | |
| [text_input], | |
| label='示例问题' | |
| ) | |
| text_output = [gr.Textbox(label='参考回答'), | |
| gr.Textbox(label='相关法律原文'), | |
| gr.Number(label="运行时长(s)")] | |
| text_button.click(generate_answer, inputs=text_input, outputs=text_output) | |
| demo.launch() | |