XThomasBU
commited on
Commit
·
5cd7fa4
1
Parent(s):
34aaae9
remove hardcoded
Browse files- README.md +1 -1
- code/modules/dataloader/data_loader.py +13 -3
README.md
CHANGED
|
@@ -37,7 +37,7 @@ Please visit [setup](https://dl4ds.github.io/dl4ds_tutor/guide/setup/) for more
|
|
| 37 |
3. **To test Data Loading (Optional)**
|
| 38 |
```bash
|
| 39 |
cd code
|
| 40 |
-
python -m modules.dataloader.data_loader
|
| 41 |
```
|
| 42 |
|
| 43 |
4. **Create the Vector Database**
|
|
|
|
| 37 |
3. **To test Data Loading (Optional)**
|
| 38 |
```bash
|
| 39 |
cd code
|
| 40 |
+
python -m modules.dataloader.data_loader --links "your_pdf_link"
|
| 41 |
```
|
| 42 |
|
| 43 |
4. **Create the Vector Database**
|
code/modules/dataloader/data_loader.py
CHANGED
|
@@ -417,6 +417,18 @@ class DataLoader:
|
|
| 417 |
|
| 418 |
if __name__ == "__main__":
|
| 419 |
import yaml
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
|
| 421 |
logger = logging.getLogger(__name__)
|
| 422 |
logger.setLevel(logging.INFO)
|
|
@@ -445,9 +457,7 @@ if __name__ == "__main__":
|
|
| 445 |
documents,
|
| 446 |
document_metadata,
|
| 447 |
) = data_loader.get_chunks(
|
| 448 |
-
|
| 449 |
-
"https://dl4ds.github.io/fa2024/static_files/discussion_slides/00_discussion.pdf"
|
| 450 |
-
],
|
| 451 |
[],
|
| 452 |
)
|
| 453 |
|
|
|
|
| 417 |
|
| 418 |
if __name__ == "__main__":
|
| 419 |
import yaml
|
| 420 |
+
import argparse
|
| 421 |
+
|
| 422 |
+
parser = argparse.ArgumentParser(description="Process some links.")
|
| 423 |
+
parser.add_argument(
|
| 424 |
+
'--links',
|
| 425 |
+
nargs='+',
|
| 426 |
+
required=True,
|
| 427 |
+
help="List of links to process."
|
| 428 |
+
)
|
| 429 |
+
|
| 430 |
+
args = parser.parse_args()
|
| 431 |
+
links_to_process = args.links
|
| 432 |
|
| 433 |
logger = logging.getLogger(__name__)
|
| 434 |
logger.setLevel(logging.INFO)
|
|
|
|
| 457 |
documents,
|
| 458 |
document_metadata,
|
| 459 |
) = data_loader.get_chunks(
|
| 460 |
+
links_to_process,
|
|
|
|
|
|
|
| 461 |
[],
|
| 462 |
)
|
| 463 |
|