vcasas commited on
Commit
88dfbed
verified
1 Parent(s): ed9f945

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -24
app.py CHANGED
@@ -1,24 +1,40 @@
1
- ===== Application Startup at 2024-12-07 13:08:38 =====
2
-
3
- Traceback (most recent call last):
4
- File "/usr/local/lib/python3.10/site-packages/tenacity/__init__.py", line 478, in __call__
5
- result = fn(*args, **kwargs)
6
- TypeError: PDFReader.load_data() missing 1 required positional argument: 'file'
7
-
8
- The above exception was the direct cause of the following exception:
9
-
10
- Traceback (most recent call last):
11
- File "/home/user/app/app.py", line 31, in <module>
12
- index = create_index_from_pdf(pdf_path)
13
- File "/home/user/app/app.py", line 17, in create_index_from_pdf
14
- documents = pdf_reader.load_data() # No abrir el archivo manualmente
15
- File "/usr/local/lib/python3.10/site-packages/tenacity/__init__.py", line 336, in wrapped_f
16
- return copy(f, *args, **kw)
17
- File "/usr/local/lib/python3.10/site-packages/tenacity/__init__.py", line 475, in __call__
18
- do = self.iter(retry_state=retry_state)
19
- File "/usr/local/lib/python3.10/site-packages/tenacity/__init__.py", line 376, in iter
20
- result = action(retry_state)
21
- File "/usr/local/lib/python3.10/site-packages/tenacity/__init__.py", line 419, in exc_check
22
- raise retry_exc from fut.exception()
23
- tenacity.RetryError: RetryError[<Future at 0x7f53588abcd0 state=finished raised TypeError>]
24
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import requests
3
+ from llama_index.core import VectorStoreIndex
4
+ from llama_index.readers.file import PDFReader
5
+ import gradio as gr
6
+
7
+ # Funci贸n para descargar el archivo PDF desde una URL
8
+ def download_pdf(url, destination):
9
+ os.makedirs(os.path.dirname(destination), exist_ok=True)
10
+ response = requests.get(url)
11
+ with open(destination, 'wb') as f:
12
+ f.write(response.content)
13
+
14
+ # Funci贸n para crear el 铆ndice a partir del PDF
15
+ def create_index_from_pdf(pdf_path):
16
+ pdf_reader = PDFReader() # Inicializar el PDFReader sin la ruta del archivo
17
+ with open(pdf_path, 'rb') as pdf_file: # Abrir el archivo en modo binario
18
+ documents = pdf_reader.load_data(file=pdf_file) # Pasar el archivo abierto
19
+
20
+ # Crear el 铆ndice
21
+ index = VectorStoreIndex.from_documents(documents)
22
+ return index
23
+
24
+ # Ruta del archivo PDF a descargar
25
+ pdf_url = 'https://www.boe.es/buscar/pdf/1995/BOE-A-1995-25444-consolidado.pdf'
26
+ pdf_path = './BOE-A-1995-25444-consolidado.pdf'
27
+
28
+ # Descargar el PDF
29
+ download_pdf(pdf_url, pdf_path)
30
+
31
+ # Crear el 铆ndice a partir del PDF
32
+ index = create_index_from_pdf(pdf_path)
33
+
34
+ # Funci贸n de b煤squeda en el 铆ndice
35
+ def search_pdf(query):
36
+ response = index.query(query)
37
+ return response
38
+
39
+ # Interfaz Gradio
40
+ gr.Interface(fn=search_pdf, inputs="text", outputs="text").launch()