Updates
Browse files
app.py
CHANGED
|
@@ -129,7 +129,7 @@ def perform_deduplication(
|
|
| 129 |
f"**Total documents:** {len(texts1)}\n\n"
|
| 130 |
f"**Duplicates found:** {num_duplicates}\n\n"
|
| 131 |
f"**Unique documents after deduplication:** {len(deduplicated_indices)}\n\n"
|
| 132 |
-
"-" * 50 + "\n\n"
|
| 133 |
)
|
| 134 |
|
| 135 |
if num_duplicates > 0:
|
|
@@ -167,6 +167,7 @@ def perform_deduplication(
|
|
| 167 |
f"**Total documents in {dataset2_name}/{dataset2_split}:** {len(texts2)}\n\n"
|
| 168 |
f"**Duplicates found in Dataset 2:** {num_duplicates}\n\n"
|
| 169 |
f"**Unique documents after deduplication:** {len(texts2) - num_duplicates}\n\n"
|
|
|
|
| 170 |
)
|
| 171 |
|
| 172 |
if num_duplicates > 0:
|
|
|
|
| 129 |
f"**Total documents:** {len(texts1)}\n\n"
|
| 130 |
f"**Duplicates found:** {num_duplicates}\n\n"
|
| 131 |
f"**Unique documents after deduplication:** {len(deduplicated_indices)}\n\n"
|
| 132 |
+
+ "-" * 50 + "\n\n"
|
| 133 |
)
|
| 134 |
|
| 135 |
if num_duplicates > 0:
|
|
|
|
| 167 |
f"**Total documents in {dataset2_name}/{dataset2_split}:** {len(texts2)}\n\n"
|
| 168 |
f"**Duplicates found in Dataset 2:** {num_duplicates}\n\n"
|
| 169 |
f"**Unique documents after deduplication:** {len(texts2) - num_duplicates}\n\n"
|
| 170 |
+
+ "-" * 50 + "\n\n"
|
| 171 |
)
|
| 172 |
|
| 173 |
if num_duplicates > 0:
|