DHEIVER commited on
Commit
4fa0927
·
verified ·
1 Parent(s): 9f99102

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -35
app.py CHANGED
@@ -1,43 +1,64 @@
1
  import gradio as gr
2
- from gradio_client import Client
3
- import PyPDF2
4
-
5
- # Função para extrair texto de um PDF
6
- def extract_text_from_pdf(pdf_path):
7
- with open(pdf_path, 'rb') as file:
8
- reader = PyPDF2.PdfFileReader(file)
9
- text = ''
10
- for page_num in range(reader.numPages):
11
- page = reader.getPage(page_num)
12
- text += page.extract_text()
13
- return text
14
-
15
- # Função para gerar parecer usando o modelo de linguagem
16
- def generate_analysis(text):
17
- client = Client("yuntian-deng/ChatGPT")
18
- result = client.predict(
19
- inputs=text,
20
- top_p=1,
21
- temperature=1,
22
- chat_counter=0,
23
- chatbot=[],
24
- api_name="/predict"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  )
26
- return result
27
 
28
- # Função principal para a interface
29
- def analyze_pdf(pdf_file):
30
- text = extract_text_from_pdf(pdf_file.name)
31
- analysis = generate_analysis(text)
32
- return analysis
33
 
34
  # Interface Gradio
35
  iface = gr.Interface(
36
- fn=analyze_pdf,
37
- inputs=gr.File(label="Upload PDF"),
38
- outputs=gr.Textbox(label="Parecer Gerado"),
39
- title="Sistema de Análise de PDF com RAG",
40
- description="Faça upload de um PDF para gerar um parecer."
 
 
 
41
  )
42
 
43
- iface.launch()
 
1
  import gradio as gr
2
+ from langchain.document_loaders import PyPDFLoader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from langchain.chains import RetrievalQA
7
+ from langchain.llms import HuggingFaceHub
8
+
9
+ # Configurações
10
+ EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
11
+ LLM_REPO_ID = "google/flan-t5-large" # Modelo de linguagem da Hugging Face
12
+
13
+ # Função para carregar e processar PDFs
14
+ def load_and_process_pdf(pdf_path):
15
+ # Carrega o PDF
16
+ loader = PyPDFLoader(pdf_path)
17
+ documents = loader.load()
18
+
19
+ # Divide o texto em chunks
20
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
21
+ texts = text_splitter.split_documents(documents)
22
+
23
+ # Cria embeddings e armazena no vetor store
24
+ embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
25
+ db = FAISS.from_documents(texts, embeddings)
26
+
27
+ return db
28
+
29
+ # Função para gerar respostas usando RAG
30
+ def generate_response(pdf_file, query):
31
+ if pdf_file is None:
32
+ return "Erro: Nenhum arquivo PDF foi carregado."
33
+
34
+ # Carrega e processa o PDF
35
+ db = load_and_process_pdf(pdf_file.name)
36
+
37
+ # Configura o modelo de linguagem
38
+ llm = HuggingFaceHub(repo_id=LLM_REPO_ID, model_kwargs={"temperature": 0.7, "max_length": 512})
39
+
40
+ # Cria a cadeia de RAG
41
+ qa_chain = RetrievalQA.from_chain_type(
42
+ llm=llm,
43
+ chain_type="stuff",
44
+ retriever=db.as_retriever(search_kwargs={"k": 3}),
45
+ return_source_documents=True
46
  )
 
47
 
48
+ # Executa a consulta
49
+ result = qa_chain({"query": query})
50
+ return result["result"]
 
 
51
 
52
  # Interface Gradio
53
  iface = gr.Interface(
54
+ fn=generate_response,
55
+ inputs=[
56
+ gr.File(label="Upload PDF", type="file"),
57
+ gr.Textbox(label="Sua Pergunta")
58
+ ],
59
+ outputs=gr.Textbox(label="Resposta Gerada"),
60
+ title="Sistema de RAG com LangChain",
61
+ description="Faça upload de um PDF e faça perguntas sobre o conteúdo."
62
  )
63
 
64
+ iface.launch(share=True)