| log_dir: '../storage/logs' # str | |
| log_chunk_dir: '../storage/logs/chunks' # str | |
| device: 'cpu' # str [cuda, cpu] | |
| vectorstore: | |
| embedd_files: False # bool | |
| data_path: '../storage/data' # str | |
| url_file_path: '../storage/data/urls.txt' # str | |
| expand_urls: True # bool | |
| db_option : 'FAISS' # str [FAISS, Chroma, RAGatouille, RAPTOR] | |
| db_path : '../vectorstores' # str | |
| model : 'sentence-transformers/all-MiniLM-L6-v2' # str [sentence-transformers/all-MiniLM-L6-v2, text-embedding-ada-002'] | |
| search_top_k : 3 # int | |
| score_threshold : 0.2 # float | |
| faiss_params: # Not used as of now | |
| index_path: '../vectorstores/faiss.index' # str | |
| index_type: 'Flat' # str [Flat, HNSW, IVF] | |
| index_dimension: 384 # int | |
| index_nlist: 100 # int | |
| index_nprobe: 10 # int | |
| colbert_params: | |
| index_name: "new_idx" # str | |
| llm_params: | |
| use_history: True # bool | |
| memory_window: 3 # int | |
| llm_loader: 'openai' # str [local_llm, openai] | |
| openai_params: | |
| model: 'gpt-3.5-turbo-1106' # str [gpt-3.5-turbo-1106, gpt-4] | |
| local_llm_params: | |
| model: 'tiny-llama' | |
| temperature: 0.7 | |
| chat_logging: | |
| log_chat: False # bool | |
| platform: 'literalai' | |
| splitter_options: | |
| use_splitter: True # bool | |
| split_by_token : True # bool | |
| remove_leftover_delimiters: True # bool | |
| remove_chunks: False # bool | |
| chunk_size : 300 # int | |
| chunk_overlap : 30 # int | |
| chunk_separators : ["\n\n", "\n", " ", ""] # list of strings | |
| front_chunks_to_remove : null # int or None | |
| last_chunks_to_remove : null # int or None | |
| delimiters_to_remove : ['\t', '\n', ' ', ' '] # list of strings |