Spaces:
Runtime error
Runtime error
Update App_Function_Libraries/Chunk_Lib.py
Browse files
App_Function_Libraries/Chunk_Lib.py
CHANGED
|
@@ -48,6 +48,18 @@ def load_document(file_path):
|
|
| 48 |
text = file.read()
|
| 49 |
return re.sub('\\s+', ' ', text).strip()
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
|
| 52 |
def improved_chunking_process(text: str, chunk_options: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 53 |
chunk_method = chunk_options.get('method', 'words')
|
|
|
|
| 48 |
text = file.read()
|
| 49 |
return re.sub('\\s+', ' ', text).strip()
|
| 50 |
|
| 51 |
+
# Load configuration
|
| 52 |
+
config = load_comprehensive_config()
|
| 53 |
+
# Embedding Chunking options
|
| 54 |
+
chunk_options = {
|
| 55 |
+
'method': config.get('Chunking', 'method', fallback='words'),
|
| 56 |
+
'max_size': config.getint('Chunking', 'max_size', fallback=400),
|
| 57 |
+
'overlap': config.getint('Chunking', 'overlap', fallback=200),
|
| 58 |
+
'adaptive': config.getboolean('Chunking', 'adaptive', fallback=False),
|
| 59 |
+
'multi_level': config.getboolean('Chunking', 'multi_level', fallback=False),
|
| 60 |
+
'language': config.get('Chunking', 'language', fallback='english')
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
|
| 64 |
def improved_chunking_process(text: str, chunk_options: Dict[str, Any]) -> List[Dict[str, Any]]:
|
| 65 |
chunk_method = chunk_options.get('method', 'words')
|