Spaces:
Running
Running
fix errors
Browse files
common.py
CHANGED
|
@@ -104,16 +104,6 @@ def dedup_pairs_bands():
|
|
| 104 |
}
|
| 105 |
).to_html(index=False, border=0)
|
| 106 |
|
| 107 |
-
# Get the HTML table
|
| 108 |
-
table_html_data = dedup_pairs_bands()
|
| 109 |
-
|
| 110 |
-
# Wrap the table in a Div for styling
|
| 111 |
-
table_div_data = Div(
|
| 112 |
-
text=table_html_data,
|
| 113 |
-
style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;"
|
| 114 |
-
)
|
| 115 |
-
|
| 116 |
-
|
| 117 |
def dup_docs_count_graph():
|
| 118 |
dup_docs_count = {
|
| 119 |
"80": 382164413,
|
|
@@ -288,7 +278,7 @@ pii_table = pd.DataFrame(
|
|
| 288 |
)
|
| 289 |
|
| 290 |
table_html_pii = pii_table.to_html(index=False, border=0)
|
| 291 |
-
table_div_pii = Div(NotStr(table_html_pii), style="
|
| 292 |
|
| 293 |
global_div = Div(
|
| 294 |
Section(
|
|
@@ -374,7 +364,7 @@ global_div = Div(
|
|
| 374 |
P(
|
| 375 |
"There is a high chance that duplicates from different bands will have the same pairs in the same horizontal partition. Performing the Bloom filter step reduces the number of pairs by nearly ninefold."
|
| 376 |
),
|
| 377 |
-
Div(NotStr(dedup_pairs_bands()), style="
|
| 378 |
P(
|
| 379 |
"The resulting unique pairs are then used to identify clusters of near-duplicates by finding connected components in a graph, where the vertices represent documents and the edges represent matches."
|
| 380 |
),
|
|
|
|
| 104 |
}
|
| 105 |
).to_html(index=False, border=0)
|
| 106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
def dup_docs_count_graph():
|
| 108 |
dup_docs_count = {
|
| 109 |
"80": 382164413,
|
|
|
|
| 278 |
)
|
| 279 |
|
| 280 |
table_html_pii = pii_table.to_html(index=False, border=0)
|
| 281 |
+
table_div_pii = Div(NotStr(table_html_pii), style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;")
|
| 282 |
|
| 283 |
global_div = Div(
|
| 284 |
Section(
|
|
|
|
| 364 |
P(
|
| 365 |
"There is a high chance that duplicates from different bands will have the same pairs in the same horizontal partition. Performing the Bloom filter step reduces the number of pairs by nearly ninefold."
|
| 366 |
),
|
| 367 |
+
Div(NotStr(dedup_pairs_bands()), style="display: flex; justify-content: center; align-items: center; width: 100%; max-width: 100%; height: auto; overflow-x: auto;"),
|
| 368 |
P(
|
| 369 |
"The resulting unique pairs are then used to identify clusters of near-duplicates by finding connected components in a graph, where the vertices represent documents and the edges represent matches."
|
| 370 |
),
|