Spaces:
				
			
			
	
			
			
		Paused
		
	
	
	
			
			
	
	
	
	
		
		
		Paused
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 | 
             
            import streamlit as st
         | 
| 2 | 
            -
            from transformers import  | 
| 3 | 
             
            from PyPDF2 import PdfReader
         | 
| 4 | 
             
            from docx import Document
         | 
| 5 | 
             
            import csv
         | 
| @@ -10,6 +10,7 @@ from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| 10 | 
             
            from langchain.embeddings import HuggingFaceEmbeddings
         | 
| 11 | 
             
            from langchain.vectorstores import FAISS
         | 
| 12 | 
             
            from huggingface_hub import login
         | 
|  | |
| 13 |  | 
| 14 | 
             
            huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
         | 
| 15 |  | 
| @@ -17,15 +18,13 @@ huggingface_token = os.getenv('HUGGINGFACE_TOKEN') | |
| 17 | 
             
            if huggingface_token:
         | 
| 18 | 
             
                login(token=huggingface_token)
         | 
| 19 |  | 
| 20 | 
            -
            #  | 
| 21 | 
             
            @st.cache_resource
         | 
| 22 | 
            -
            def  | 
| 23 | 
            -
                 | 
| 24 | 
            -
                 | 
| 25 | 
            -
                text_gen_pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
         | 
| 26 | 
            -
                return text_gen_pipeline
         | 
| 27 |  | 
| 28 | 
            -
             | 
| 29 |  | 
| 30 | 
             
            # Configuraci贸n del modelo de clasificaci贸n
         | 
| 31 | 
             
            @st.cache_resource
         | 
| @@ -75,18 +74,16 @@ def classify_text(text): | |
| 75 | 
             
                return predicted_label
         | 
| 76 |  | 
| 77 | 
             
            def translate(text, target_language):
         | 
| 78 | 
            -
                template = '''
         | 
| 79 | 
            -
                Por favor, traduzca el siguiente documento al { | 
| 80 | 
             
            <document>
         | 
| 81 | 
            -
            { | 
| 82 | 
             
            </document>
         | 
| 83 | 
             
            Aseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento.
         | 
| 84 | 
             
                '''
         | 
| 85 | 
            -
                
         | 
| 86 | 
            -
                 | 
| 87 | 
            -
                 | 
| 88 | 
            -
                translated_text = response[0]['generated_text']
         | 
| 89 | 
            -
                
         | 
| 90 | 
             
                return translated_text
         | 
| 91 |  | 
| 92 | 
             
            def summarize(text, length):
         | 
| @@ -97,10 +94,9 @@ def summarize(text, length): | |
| 97 | 
             
            </document>
         | 
| 98 | 
             
            Aseg煤rese de que el resumen sea conciso y conserve el significado original del documento.
         | 
| 99 | 
             
                '''
         | 
| 100 | 
            -
                
         | 
| 101 | 
            -
                response =  | 
| 102 | 
            -
                summarized_text = response | 
| 103 | 
            -
                
         | 
| 104 | 
             
                return summarized_text
         | 
| 105 |  | 
| 106 | 
             
            def handle_uploaded_file(uploaded_file):
         | 
| @@ -131,21 +127,22 @@ def handle_uploaded_file(uploaded_file): | |
| 131 |  | 
| 132 | 
             
            def main():
         | 
| 133 | 
             
                st.title("LexAIcon")
         | 
| 134 | 
            -
                st.write("Puedes conversar con este chatbot basado en  | 
| 135 |  | 
| 136 | 
             
                if "messages" not in st.session_state:
         | 
| 137 | 
            -
                    st.session_state["messages"] = [ | 
| 138 |  | 
| 139 | 
             
                with st.sidebar:
         | 
| 140 | 
             
                    st.text_input("HuggingFace Token", value=huggingface_token, type="password", key="huggingface_token")
         | 
| 141 | 
             
                    st.caption("[Consigue un HuggingFace Token](https://huggingface.co/settings/tokens)")
         | 
| 142 |  | 
| 143 | 
             
                for msg in st.session_state.messages:
         | 
| 144 | 
            -
                    st. | 
| 145 |  | 
| 146 | 
            -
                 | 
| 147 | 
            -
             | 
| 148 | 
            -
             | 
|  | |
| 149 |  | 
| 150 | 
             
                    operation = st.radio("Selecciona una operaci贸n", ["Resumir", "Traducir", "Explicar"])
         | 
| 151 | 
             
                    target_language = None
         | 
| @@ -162,12 +159,12 @@ def main(): | |
| 162 | 
             
                            file_content = handle_uploaded_file(uploaded_file)
         | 
| 163 | 
             
                            classification = classify_text(file_content)
         | 
| 164 | 
             
                            vector_store = vector_stores[classification]
         | 
| 165 | 
            -
                            search_docs = vector_store.similarity_search( | 
| 166 | 
             
                            context = " ".join([doc.page_content for doc in search_docs])
         | 
| 167 | 
            -
                            prompt_with_context = f"Contexto: {context}\n\nPregunta: { | 
| 168 | 
            -
                             | 
| 169 | 
            -
                             | 
| 170 | 
            -
             | 
| 171 | 
             
                    elif operation == "Resumir":
         | 
| 172 | 
             
                        if summary_length == "corto":
         | 
| 173 | 
             
                            length = "de aproximadamente 50 palabras"
         | 
| @@ -175,17 +172,16 @@ def main(): | |
| 175 | 
             
                            length = "de aproximadamente 100 palabras"
         | 
| 176 | 
             
                        elif summary_length == "largo":
         | 
| 177 | 
             
                            length = "de aproximadamente 500 palabras"
         | 
| 178 | 
            -
                         | 
| 179 | 
            -
             | 
| 180 | 
             
                    elif operation == "Traducir":
         | 
| 181 | 
            -
                         | 
| 182 | 
            -
             | 
| 183 | 
             
                    else:
         | 
| 184 | 
            -
                         | 
| 185 | 
            -
                         | 
|  | |
| 186 |  | 
| 187 | 
            -
                    st.session_state.messages.append({"role": "assistant", "content":  | 
| 188 | 
            -
                    st. | 
| 189 |  | 
| 190 | 
             
            if __name__ == "__main__":
         | 
| 191 | 
             
                main()
         | 
|  | |
| 1 | 
             
            import streamlit as st
         | 
| 2 | 
            +
            from transformers import InferenceClient
         | 
| 3 | 
             
            from PyPDF2 import PdfReader
         | 
| 4 | 
             
            from docx import Document
         | 
| 5 | 
             
            import csv
         | 
|  | |
| 10 | 
             
            from langchain.embeddings import HuggingFaceEmbeddings
         | 
| 11 | 
             
            from langchain.vectorstores import FAISS
         | 
| 12 | 
             
            from huggingface_hub import login
         | 
| 13 | 
            +
            from transformers import AutoTokenizer, AutoModelForSequenceClassification
         | 
| 14 |  | 
| 15 | 
             
            huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
         | 
| 16 |  | 
|  | |
| 18 | 
             
            if huggingface_token:
         | 
| 19 | 
             
                login(token=huggingface_token)
         | 
| 20 |  | 
| 21 | 
            +
            # Configuraci贸n del cliente de inferencia
         | 
| 22 | 
             
            @st.cache_resource
         | 
| 23 | 
            +
            def load_inference_client():
         | 
| 24 | 
            +
                client = InferenceClient(model="mistralai/Mistral-7B-Instruct-v0.3")
         | 
| 25 | 
            +
                return client
         | 
|  | |
|  | |
| 26 |  | 
| 27 | 
            +
            client = load_inference_client()
         | 
| 28 |  | 
| 29 | 
             
            # Configuraci贸n del modelo de clasificaci贸n
         | 
| 30 | 
             
            @st.cache_resource
         | 
|  | |
| 74 | 
             
                return predicted_label
         | 
| 75 |  | 
| 76 | 
             
            def translate(text, target_language):
         | 
| 77 | 
            +
                template = f'''
         | 
| 78 | 
            +
                Por favor, traduzca el siguiente documento al {target_language}:
         | 
| 79 | 
             
            <document>
         | 
| 80 | 
            +
            {text}
         | 
| 81 | 
             
            </document>
         | 
| 82 | 
             
            Aseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento.
         | 
| 83 | 
             
                '''
         | 
| 84 | 
            +
                messages = [{"role": "user", "content": template}]
         | 
| 85 | 
            +
                response = client.chat(messages)
         | 
| 86 | 
            +
                translated_text = response.generated_text
         | 
|  | |
|  | |
| 87 | 
             
                return translated_text
         | 
| 88 |  | 
| 89 | 
             
            def summarize(text, length):
         | 
|  | |
| 94 | 
             
            </document>
         | 
| 95 | 
             
            Aseg煤rese de que el resumen sea conciso y conserve el significado original del documento.
         | 
| 96 | 
             
                '''
         | 
| 97 | 
            +
                messages = [{"role": "user", "content": template}]
         | 
| 98 | 
            +
                response = client.chat(messages)
         | 
| 99 | 
            +
                summarized_text = response.generated_text
         | 
|  | |
| 100 | 
             
                return summarized_text
         | 
| 101 |  | 
| 102 | 
             
            def handle_uploaded_file(uploaded_file):
         | 
|  | |
| 127 |  | 
| 128 | 
             
            def main():
         | 
| 129 | 
             
                st.title("LexAIcon")
         | 
| 130 | 
            +
                st.write("Puedes conversar con este chatbot basado en Mistral-7B-Instruct y subir archivos para que el chatbot los procese.")
         | 
| 131 |  | 
| 132 | 
             
                if "messages" not in st.session_state:
         | 
| 133 | 
            +
                    st.session_state["messages"] = []
         | 
| 134 |  | 
| 135 | 
             
                with st.sidebar:
         | 
| 136 | 
             
                    st.text_input("HuggingFace Token", value=huggingface_token, type="password", key="huggingface_token")
         | 
| 137 | 
             
                    st.caption("[Consigue un HuggingFace Token](https://huggingface.co/settings/tokens)")
         | 
| 138 |  | 
| 139 | 
             
                for msg in st.session_state.messages:
         | 
| 140 | 
            +
                    st.write(f"**{msg['role'].capitalize()}:** {msg['content']}")
         | 
| 141 |  | 
| 142 | 
            +
                user_input = st.text_input("Introduce tu consulta:", "")
         | 
| 143 | 
            +
                
         | 
| 144 | 
            +
                if user_input:
         | 
| 145 | 
            +
                    st.session_state.messages.append({"role": "user", "content": user_input})
         | 
| 146 |  | 
| 147 | 
             
                    operation = st.radio("Selecciona una operaci贸n", ["Resumir", "Traducir", "Explicar"])
         | 
| 148 | 
             
                    target_language = None
         | 
|  | |
| 159 | 
             
                            file_content = handle_uploaded_file(uploaded_file)
         | 
| 160 | 
             
                            classification = classify_text(file_content)
         | 
| 161 | 
             
                            vector_store = vector_stores[classification]
         | 
| 162 | 
            +
                            search_docs = vector_store.similarity_search(user_input)
         | 
| 163 | 
             
                            context = " ".join([doc.page_content for doc in search_docs])
         | 
| 164 | 
            +
                            prompt_with_context = f"Contexto: {context}\n\nPregunta: {user_input}"
         | 
| 165 | 
            +
                            messages = [{"role": "user", "content": prompt_with_context}]
         | 
| 166 | 
            +
                            response = client.chat(messages)
         | 
| 167 | 
            +
                            bot_response = response.generated_text
         | 
| 168 | 
             
                    elif operation == "Resumir":
         | 
| 169 | 
             
                        if summary_length == "corto":
         | 
| 170 | 
             
                            length = "de aproximadamente 50 palabras"
         | 
|  | |
| 172 | 
             
                            length = "de aproximadamente 100 palabras"
         | 
| 173 | 
             
                        elif summary_length == "largo":
         | 
| 174 | 
             
                            length = "de aproximadamente 500 palabras"
         | 
| 175 | 
            +
                        bot_response = summarize(user_input, length)
         | 
|  | |
| 176 | 
             
                    elif operation == "Traducir":
         | 
| 177 | 
            +
                        bot_response = translate(user_input, target_language)
         | 
|  | |
| 178 | 
             
                    else:
         | 
| 179 | 
            +
                        messages = [{"role": "user", "content": user_input}]
         | 
| 180 | 
            +
                        response = client.chat(messages)
         | 
| 181 | 
            +
                        bot_response = response.generated_text
         | 
| 182 |  | 
| 183 | 
            +
                    st.session_state.messages.append({"role": "assistant", "content": bot_response})
         | 
| 184 | 
            +
                    st.write(f"**Assistant:** {bot_response}")
         | 
| 185 |  | 
| 186 | 
             
            if __name__ == "__main__":
         | 
| 187 | 
             
                main()
         |