index_type: faiss annoy_config: distance_function: IP index_train_num: 1000000 log_interval: 10000 batch_size: 512 n_trees: -1 n_jobs: -1 search_k: -1 on_disk_build: false faiss_config: distance_function: IP index_train_num: 1000000 log_interval: 10000 batch_size: 512 index_type: auto n_subquantizers: 8 n_bits: 8 n_list: 1000 factory_str: null n_probe: 32 device_id: [] k_factor: 10 polysemous_ht: 0 efSearch: 100 scann_config: distance_function: IP index_train_num: 1000000 log_interval: 10000 batch_size: 512 num_leaves: 2000 num_leaves_to_search: 500 num_neighbors: 10 anisotropic_quantization_threshold: 0.2 dimensions_per_block: 2 threads: 0 log_interval: 10000 top_k: 10 batch_size: 512 query_preprocess_pipeline: processor_type: [] length_filter_config: max_tokens: null min_tokens: null max_chars: null min_chars: null max_bytes: null min_bytes: null tokenizer_config: tokenizer_type: moses hf_tokenizer_path: null tiktok_tokenizer_name: null lang: null token_normalize_config: lang: en penn: true norm_quote_commas: true norm_numbers: true pre_replace_unicode_punct: false post_remove_control_chars: false perl_parity: false truncate_config: max_chars: null max_bytes: null max_tokens: null tokenizer_config: tokenizer_type: moses hf_tokenizer_path: null tiktok_tokenizer_name: null lang: null database_path: /data/zhangzhuocheng/Lab/Python/LLM/datasets/RAG/toycorp/dense query_encoder_config: encoder_type: hf cohere_config: model: embed-multilingual-v3.0 input_type: search_document base_url: null api_key: ??? proxy: null hf_config: model_path: sentence-transformers/all-MiniLM-L6-v2 tokenizer_path: null trust_remote_code: false device_id: - 0 load_dtype: auto max_encode_length: 512 encode_method: mean normalize: false prompt: '' task: '' hf_clip_config: model_path: ??? tokenizer_path: null trust_remote_code: false device_id: [] load_dtype: auto max_encode_length: 512 normalize: false convert_to_rgb: false jina_config: model: jina-embeddings-v3 base_url: https://api.jina.ai/v1/embeddings api_key: jina_9c53f483a93542228e62c02fbc599daeItZmWTUU2zuukZ-O3S3UoVtURxX5 dimensions: 1024 task: null proxy: null ollama_config: model_name: ??? base_url: ??? prompt: null verbose: false embedding_size: 768 allow_parallel: true openai_config: is_azure: false model_name: ??? base_url: null api_key: EMPTY api_version: 2024-07-01-preview verbose: false proxy: null dimension: null sentence_transformer_config: model_path: ??? device_id: [] trust_remote_code: false task: null prompt_name: null prompt: null prompt_dict: null normalize: false model_kwargs: {} passage_encoder_config: encoder_type: hf cohere_config: model: embed-multilingual-v3.0 input_type: search_document base_url: null api_key: ??? proxy: null hf_config: model_path: sentence-transformers/all-MiniLM-L6-v2 tokenizer_path: null trust_remote_code: false device_id: - 0 - 1 - 2 - 3 load_dtype: auto max_encode_length: 512 encode_method: mean normalize: false prompt: '' task: '' hf_clip_config: model_path: ??? tokenizer_path: null trust_remote_code: false device_id: [] load_dtype: auto max_encode_length: 512 normalize: false convert_to_rgb: false jina_config: model: jina-embeddings-v3 base_url: https://api.jina.ai/v1/embeddings api_key: jina_9c53f483a93542228e62c02fbc599daeItZmWTUU2zuukZ-O3S3UoVtURxX5 dimensions: 1024 task: null proxy: null ollama_config: model_name: ??? base_url: ??? prompt: null verbose: false embedding_size: 768 allow_parallel: true openai_config: is_azure: false model_name: ??? base_url: null api_key: EMPTY api_version: 2024-07-01-preview verbose: false proxy: null dimension: null sentence_transformer_config: model_path: ??? device_id: [] trust_remote_code: false task: null prompt_name: null prompt: null prompt_dict: null normalize: false model_kwargs: {} refine_factor: 10 encode_fields: - text