19/10/2023
Browse files- app.py +91 -0
- requirements.txt +11 -0
app.py
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pandas as pd
|
3 |
+
import openai
|
4 |
+
import re
|
5 |
+
import json
|
6 |
+
from langchain.chat_models import ChatOpenAI
|
7 |
+
import regex
|
8 |
+
import gradio as gr
|
9 |
+
from langchain.schema import Document
|
10 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
11 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
12 |
+
from openai.embeddings_utils import get_embedding
|
13 |
+
from openai.embeddings_utils import cosine_similarity
|
14 |
+
import gspread # See: https://docs.gspread.org/en/v5.10.0/user-guide.html
|
15 |
+
from oauth2client.service_account import ServiceAccountCredentials
|
16 |
+
from datetime import datetime
|
17 |
+
|
18 |
+
# API de OpenAI
|
19 |
+
openai.api_key = os.getenv("OPENAI_API_KEY")
|
20 |
+
|
21 |
+
# Establece las credenciales y la API
|
22 |
+
|
23 |
+
#credentials = os.getenv( "credentials" )
|
24 |
+
|
25 |
+
#credentials = json.loads( credentials )
|
26 |
+
|
27 |
+
#gc = gspread.service_account_from_dict( credentials )
|
28 |
+
|
29 |
+
#Google_URL = os.getenv( "Google_URL" )
|
30 |
+
|
31 |
+
# Cargar el archivo
|
32 |
+
df_Expediente = pd.read_csv( os.getenv( "Data" ) )
|
33 |
+
|
34 |
+
# Main OpenAI Function
|
35 |
+
#
|
36 |
+
# Function: Get embeddings
|
37 |
+
|
38 |
+
def get_embedding( text , model ):
|
39 |
+
text = text.replace("\n", " ")
|
40 |
+
return openai.Embedding.create( input = [text], model = model )['data'][0]['embedding']
|
41 |
+
|
42 |
+
# Funci贸n de b煤squeda
|
43 |
+
|
44 |
+
def buscar( busqueda, datos, n_resultados):
|
45 |
+
#
|
46 |
+
busqueda_embed = get_embedding( busqueda, "text-embedding-ada-002" )
|
47 |
+
#
|
48 |
+
datos['Similitud'] = datos['Embedding'].apply( lambda x: cosine_similarity( eval( x ) , busqueda_embed ) )
|
49 |
+
#
|
50 |
+
datos = datos.sort_values('Similitud', ascending = False )
|
51 |
+
#
|
52 |
+
return datos.iloc[:n_resultados][['Documento', 'Pagina', 'Textos', 'NumPalabras', 'Embedding', 'Similitud']]
|
53 |
+
|
54 |
+
#
|
55 |
+
def Chat( user_message_1 ):
|
56 |
+
#
|
57 |
+
df_final = buscar( user_message_1, df_Expediente, 20).reset_index( drop = True)
|
58 |
+
#
|
59 |
+
Textos = df_final[ ['Documento', 'Pagina', 'Textos'] ]
|
60 |
+
#
|
61 |
+
Textos_Concatenados = '\n\n\n'.join( Textos.apply( lambda row: ' | '.join( row ), axis = 1) )
|
62 |
+
#
|
63 |
+
# Save Question and date time
|
64 |
+
#update_records( user_message_1 )
|
65 |
+
return Textos_Concatenados
|
66 |
+
#
|
67 |
+
#
|
68 |
+
with gr.Blocks() as demo:
|
69 |
+
#
|
70 |
+
gr.Markdown("App basada en servicios (Embeddings) de OpenAI (Chat GPT-3.5)")
|
71 |
+
gr.Markdown("Este es un producto de prueba desarrollado por Games Econ")
|
72 |
+
gr.Markdown("Contacto: ")
|
73 |
+
gr.Markdown("Este es un information retrieval system o sistema de recuperaci贸n de informaci贸n.")
|
74 |
+
gr.Markdown("Este tipo de herramientas son un proceso para obtener informaci贸n relevante \
|
75 |
+
para una necesidad de informaci贸n a partir de una colecci贸n de textos.")
|
76 |
+
gr.Markdown("Las b煤squedas pueden basarse en un texto completo, una frase o una serie de conceptos.")
|
77 |
+
gr.Markdown("No obstante, cuanto m谩s detallada sea la sentencia buscada, la recuperaci贸n de \
|
78 |
+
informaci贸n ser谩 m谩s precisa.")
|
79 |
+
gr.Markdown("En alguna medida, los motores de b煤squeda web (como Google) son ejemplos de estos sistemas.")
|
80 |
+
busqueda = gr.Textbox(label = "Escribe la pregunta, tema o enunciado.")
|
81 |
+
greet_btn = gr.Button("Preguntar")
|
82 |
+
# Crear dos widgets de salida en lugar de uno
|
83 |
+
output1 = gr.Textbox(label = "El Top 20 de los extractos de documentos que coinciden con la consulta son:")
|
84 |
+
|
85 |
+
# Modificar la funci贸n click para asignar cada parte de la tupla a un widget diferente
|
86 |
+
greet_btn.click(fn = Chat, inputs=[busqueda], outputs=[output1])
|
87 |
+
#
|
88 |
+
#demo.launch( share = True )
|
89 |
+
demo.launch( )
|
90 |
+
#
|
91 |
+
#
|
requirements.txt
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio==3.27.0
|
2 |
+
gradio-client==0.1.3
|
3 |
+
gspread==5.10.0
|
4 |
+
langchain==0.0.163
|
5 |
+
oauth2client==4.1.3
|
6 |
+
openai==0.27.4
|
7 |
+
pandas==2.0.3
|
8 |
+
plotly==5.10.0
|
9 |
+
regex==2023.3.23
|
10 |
+
scikit-learn==1.1.3
|
11 |
+
scipy==1.9.3
|