Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -19,6 +19,7 @@ from langchain.embeddings.openai import OpenAIEmbeddings | |
| 19 | 
             
            from langchain.prompts import PromptTemplate
         | 
| 20 | 
             
            from langchain.text_splitter import RecursiveCharacterTextSplitter
         | 
| 21 | 
             
            from langchain.vectorstores import Chroma
         | 
|  | |
| 22 | 
             
            #from langchain.vectorstores import MongoDBAtlasVectorSearch
         | 
| 23 |  | 
| 24 | 
             
            #from pymongo import MongoClient
         | 
| @@ -75,6 +76,7 @@ YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE" | |
| 75 |  | 
| 76 |  | 
| 77 |  | 
|  | |
| 78 | 
             
            ################################################
         | 
| 79 | 
             
            #LLM Model mit dem gearbeitet wird
         | 
| 80 | 
             
            #openai
         | 
| @@ -168,10 +170,12 @@ def document_loading_splitting(): | |
| 168 |  | 
| 169 | 
             
            #Chroma DB die splits ablegen - vektorisiert...
         | 
| 170 | 
             
            def document_storage_chroma(splits):
         | 
| 171 | 
            -
                 | 
| 172 | 
            -
             | 
| 173 | 
            -
                                      persist_directory = PATH_WORK + CHROMA_DIR)
         | 
| 174 |  | 
|  | |
|  | |
|  | |
| 175 | 
             
            #Mongo DB die splits ablegen - vektorisiert...
         | 
| 176 | 
             
            def document_storage_mongodb(splits):
         | 
| 177 | 
             
                MongoDBAtlasVectorSearch.from_documents(documents = splits,
         | 
| @@ -186,10 +190,9 @@ def document_retrieval_chroma(llm, prompt): | |
| 186 | 
             
                #embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
         | 
| 187 | 
             
                #etwas weniger rechenaufwendig:
         | 
| 188 | 
             
                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
         | 
| 189 | 
            -
             | 
| 190 | 
            -
                 | 
| 191 | 
            -
             | 
| 192 | 
            -
                
         | 
| 193 | 
             
                return db
         | 
| 194 |  | 
| 195 | 
             
            #dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
         | 
| @@ -304,6 +307,7 @@ def invoke (prompt, history, rag_option, openai_api_key,  temperature=0.9, max_n | |
| 304 | 
             
                    #llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64}) 
         | 
| 305 | 
             
                    #llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
         | 
| 306 |  | 
|  | |
| 307 | 
             
                    #zusätzliche Dokumenten Splits aus DB zum Prompt hinzufügen (aus VektorDB - Chroma oder Mongo DB)
         | 
| 308 | 
             
                    if (rag_option == "An"):
         | 
| 309 | 
             
                        #muss nur einmal ausgeführt werden... 
         | 
|  | |
| 19 | 
             
            from langchain.prompts import PromptTemplate
         | 
| 20 | 
             
            from langchain.text_splitter import RecursiveCharacterTextSplitter
         | 
| 21 | 
             
            from langchain.vectorstores import Chroma
         | 
| 22 | 
            +
            from chromadb.errors import InvalidDimensionException
         | 
| 23 | 
             
            #from langchain.vectorstores import MongoDBAtlasVectorSearch
         | 
| 24 |  | 
| 25 | 
             
            #from pymongo import MongoClient
         | 
|  | |
| 76 |  | 
| 77 |  | 
| 78 |  | 
| 79 | 
            +
             | 
| 80 | 
             
            ################################################
         | 
| 81 | 
             
            #LLM Model mit dem gearbeitet wird
         | 
| 82 | 
             
            #openai
         | 
|  | |
| 170 |  | 
| 171 | 
             
            #Chroma DB die splits ablegen - vektorisiert...
         | 
| 172 | 
             
            def document_storage_chroma(splits):
         | 
| 173 | 
            +
                #OpenAi embediings
         | 
| 174 | 
            +
                #Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()),  persist_directory = PATH_WORK + CHROMA_DIR)  
         | 
|  | |
| 175 |  | 
| 176 | 
            +
                #HF embeddings
         | 
| 177 | 
            +
                Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}),  persist_directory = PATH_WORK + CHROMA_DIR)  
         | 
| 178 | 
            +
                
         | 
| 179 | 
             
            #Mongo DB die splits ablegen - vektorisiert...
         | 
| 180 | 
             
            def document_storage_mongodb(splits):
         | 
| 181 | 
             
                MongoDBAtlasVectorSearch.from_documents(documents = splits,
         | 
|  | |
| 190 | 
             
                #embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
         | 
| 191 | 
             
                #etwas weniger rechenaufwendig:
         | 
| 192 | 
             
                embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
         | 
| 193 | 
            +
             | 
| 194 | 
            +
                #ChromaDb für OpenAI embedinngs
         | 
| 195 | 
            +
                db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
         | 
|  | |
| 196 | 
             
                return db
         | 
| 197 |  | 
| 198 | 
             
            #dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
         | 
|  | |
| 307 | 
             
                    #llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64}) 
         | 
| 308 | 
             
                    #llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
         | 
| 309 |  | 
| 310 | 
            +
             | 
| 311 | 
             
                    #zusätzliche Dokumenten Splits aus DB zum Prompt hinzufügen (aus VektorDB - Chroma oder Mongo DB)
         | 
| 312 | 
             
                    if (rag_option == "An"):
         | 
| 313 | 
             
                        #muss nur einmal ausgeführt werden... 
         |