Spaces:
Running
Running
Update web.py
Browse files
web.py
CHANGED
|
@@ -366,13 +366,13 @@ def web_data():
|
|
| 366 |
|
| 367 |
|
| 368 |
Details(
|
| 369 |
-
Summary("Non-English
|
| 370 |
Div(
|
| 371 |
DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
| 372 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 373 |
),
|
| 374 |
style="""
|
| 375 |
-
background-color: #
|
| 376 |
padding: 15px;
|
| 377 |
border-radius: 12px;
|
| 378 |
margin-bottom: 15px
|
|
@@ -382,13 +382,13 @@ def web_data():
|
|
| 382 |
#DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
| 383 |
|
| 384 |
Details(
|
| 385 |
-
Summary("English Documents Scoring Lower than 0.65"),
|
| 386 |
Div(
|
| 387 |
DV("data/sample_en_low.json", 3, "Sample documents that are classified as English but with score less than 0.65"),
|
| 388 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 389 |
),
|
| 390 |
style="""
|
| 391 |
-
background-color: #
|
| 392 |
padding: 15px;
|
| 393 |
border-radius: 12px;
|
| 394 |
margin-bottom: 15px
|
|
@@ -483,7 +483,7 @@ def web_data():
|
|
| 483 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 484 |
),
|
| 485 |
style="""
|
| 486 |
-
background-color: #
|
| 487 |
padding: 15px;
|
| 488 |
border-radius: 12px;
|
| 489 |
margin-bottom: 15px
|
|
@@ -510,7 +510,7 @@ def web_data():
|
|
| 510 |
"""),
|
| 511 |
|
| 512 |
Details(
|
| 513 |
-
Summary("
|
| 514 |
Div (
|
| 515 |
DV(
|
| 516 |
"data/sample_terminal_punc.json",
|
|
@@ -520,7 +520,7 @@ def web_data():
|
|
| 520 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 521 |
),
|
| 522 |
style="""
|
| 523 |
-
background-color: #
|
| 524 |
padding: 15px;
|
| 525 |
border-radius: 12px;
|
| 526 |
margin-bottom: 15px
|
|
@@ -539,7 +539,7 @@ def web_data():
|
|
| 539 |
The additional keyword could be any one of “enable” / “disable” / “require” / “activate” / “browser”.
|
| 540 |
"""),
|
| 541 |
Details(
|
| 542 |
-
Summary("
|
| 543 |
Div (
|
| 544 |
DV(
|
| 545 |
"data/sample_java.jsonl",
|
|
@@ -549,7 +549,7 @@ def web_data():
|
|
| 549 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 550 |
),
|
| 551 |
style="""
|
| 552 |
-
background-color: #
|
| 553 |
padding: 15px;
|
| 554 |
border-radius: 12px;
|
| 555 |
margin-bottom: 15px
|
|
@@ -565,7 +565,7 @@ def web_data():
|
|
| 565 |
Li("the line only contains one word.", style = "margin-bottom: 5px"),
|
| 566 |
),
|
| 567 |
Details(
|
| 568 |
-
Summary("
|
| 569 |
Div (
|
| 570 |
DV(
|
| 571 |
"data/sample_refinedweb_line.json",
|
|
@@ -575,7 +575,7 @@ def web_data():
|
|
| 575 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 576 |
),
|
| 577 |
style="""
|
| 578 |
-
background-color: #
|
| 579 |
padding: 15px;
|
| 580 |
border-radius: 12px;
|
| 581 |
margin-bottom: 15px
|
|
@@ -665,7 +665,7 @@ def web_data():
|
|
| 665 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 666 |
),
|
| 667 |
style="""
|
| 668 |
-
background-color: #
|
| 669 |
padding: 15px;
|
| 670 |
border-radius: 12px;
|
| 671 |
margin-bottom: 15px
|
|
@@ -708,7 +708,7 @@ def web_data():
|
|
| 708 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 709 |
),
|
| 710 |
style="""
|
| 711 |
-
background-color: #
|
| 712 |
padding: 15px;
|
| 713 |
border-radius: 12px;
|
| 714 |
margin-bottom: 15px
|
|
@@ -762,7 +762,7 @@ def web_data():
|
|
| 762 |
""",
|
| 763 |
),
|
| 764 |
Details(
|
| 765 |
-
Summary("
|
| 766 |
Div(
|
| 767 |
DV(
|
| 768 |
"data/repeat_line_frac.jsonl",
|
|
@@ -772,7 +772,7 @@ def web_data():
|
|
| 772 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 773 |
),
|
| 774 |
style="""
|
| 775 |
-
background-color: #
|
| 776 |
padding: 15px;
|
| 777 |
border-radius: 12px;
|
| 778 |
margin-bottom: 15px
|
|
@@ -803,7 +803,7 @@ def web_data():
|
|
| 803 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 804 |
),
|
| 805 |
style="""
|
| 806 |
-
background-color: #
|
| 807 |
padding: 15px;
|
| 808 |
border-radius: 12px;
|
| 809 |
margin-bottom: 15px
|
|
@@ -850,7 +850,7 @@ def web_data():
|
|
| 850 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 851 |
),
|
| 852 |
style="""
|
| 853 |
-
background-color: #
|
| 854 |
padding: 15px;
|
| 855 |
border-radius: 12px;
|
| 856 |
margin-bottom: 15px
|
|
@@ -882,7 +882,7 @@ def web_data():
|
|
| 882 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 883 |
),
|
| 884 |
style="""
|
| 885 |
-
background-color: #
|
| 886 |
padding: 15px;
|
| 887 |
border-radius: 12px;
|
| 888 |
margin-bottom: 15px
|
|
@@ -925,7 +925,7 @@ def web_data():
|
|
| 925 |
""",
|
| 926 |
),
|
| 927 |
Details(
|
| 928 |
-
Summary("
|
| 929 |
Div(
|
| 930 |
DV(
|
| 931 |
"data/sample_top_ngram.json",
|
|
@@ -935,7 +935,7 @@ def web_data():
|
|
| 935 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 936 |
),
|
| 937 |
style="""
|
| 938 |
-
background-color: #
|
| 939 |
padding: 15px;
|
| 940 |
border-radius: 12px;
|
| 941 |
margin-bottom: 15px
|
|
@@ -969,7 +969,7 @@ def web_data():
|
|
| 969 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 970 |
),
|
| 971 |
style="""
|
| 972 |
-
background-color: #
|
| 973 |
padding: 15px;
|
| 974 |
border-radius: 12px;
|
| 975 |
margin-bottom: 15px
|
|
@@ -1031,7 +1031,7 @@ def web_data():
|
|
| 1031 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1032 |
),
|
| 1033 |
style="""
|
| 1034 |
-
background-color: #
|
| 1035 |
padding: 15px;
|
| 1036 |
border-radius: 12px;
|
| 1037 |
margin-bottom: 15px
|
|
@@ -1065,7 +1065,7 @@ def web_data():
|
|
| 1065 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1066 |
),
|
| 1067 |
style="""
|
| 1068 |
-
background-color: #
|
| 1069 |
padding: 15px;
|
| 1070 |
border-radius: 12px;
|
| 1071 |
margin-bottom: 15px
|
|
@@ -1134,7 +1134,7 @@ def web_data():
|
|
| 1134 |
""",
|
| 1135 |
),
|
| 1136 |
Details(
|
| 1137 |
-
Summary("
|
| 1138 |
P("""
|
| 1139 |
Considering n = 5 and the sample sentence:
|
| 1140 |
|
|
@@ -1157,7 +1157,7 @@ def web_data():
|
|
| 1157 |
"Sample Documents Filtered by the Fraction of Characters in Duplicated N-grams (n=5,...,10)"
|
| 1158 |
),
|
| 1159 |
Details(
|
| 1160 |
-
Summary("
|
| 1161 |
Div(
|
| 1162 |
DV(
|
| 1163 |
"data/sample_dup_ngram.json",
|
|
@@ -1167,7 +1167,7 @@ def web_data():
|
|
| 1167 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1168 |
),
|
| 1169 |
style="""
|
| 1170 |
-
background-color: #
|
| 1171 |
padding: 15px;
|
| 1172 |
border-radius: 12px;
|
| 1173 |
margin-bottom: 15px
|
|
@@ -1201,7 +1201,7 @@ def web_data():
|
|
| 1201 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1202 |
),
|
| 1203 |
style="""
|
| 1204 |
-
background-color: #
|
| 1205 |
padding: 15px;
|
| 1206 |
border-radius: 12px;
|
| 1207 |
margin-bottom: 15px
|
|
@@ -1254,7 +1254,7 @@ def web_data():
|
|
| 1254 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1255 |
),
|
| 1256 |
style="""
|
| 1257 |
-
background-color: #
|
| 1258 |
padding: 15px;
|
| 1259 |
border-radius: 12px;
|
| 1260 |
margin-bottom: 15px
|
|
@@ -1263,7 +1263,7 @@ def web_data():
|
|
| 1263 |
|
| 1264 |
|
| 1265 |
Details(
|
| 1266 |
-
Summary("
|
| 1267 |
Div(
|
| 1268 |
DV(
|
| 1269 |
"data/line_info.json",
|
|
@@ -1273,7 +1273,7 @@ def web_data():
|
|
| 1273 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1274 |
),
|
| 1275 |
style="""
|
| 1276 |
-
background-color: #
|
| 1277 |
padding: 15px;
|
| 1278 |
border-radius: 12px;
|
| 1279 |
margin-bottom: 15px
|
|
@@ -1343,7 +1343,7 @@ def web_data():
|
|
| 1343 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1344 |
),
|
| 1345 |
style="""
|
| 1346 |
-
background-color: #
|
| 1347 |
padding: 15px;
|
| 1348 |
border-radius: 12px;
|
| 1349 |
margin-bottom: 15px
|
|
@@ -1363,7 +1363,7 @@ def web_data():
|
|
| 1363 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1364 |
),
|
| 1365 |
style="""
|
| 1366 |
-
background-color: #
|
| 1367 |
padding: 15px;
|
| 1368 |
border-radius: 12px;
|
| 1369 |
margin-bottom: 15px
|
|
@@ -1414,7 +1414,7 @@ def web_data():
|
|
| 1414 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1415 |
),
|
| 1416 |
style="""
|
| 1417 |
-
background-color: #
|
| 1418 |
padding: 15px;
|
| 1419 |
border-radius: 12px;
|
| 1420 |
margin-bottom: 15px
|
|
@@ -1463,7 +1463,7 @@ def web_data():
|
|
| 1463 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1464 |
),
|
| 1465 |
style="""
|
| 1466 |
-
background-color: #
|
| 1467 |
padding: 15px;
|
| 1468 |
border-radius: 12px;
|
| 1469 |
margin-bottom: 15px
|
|
@@ -1498,7 +1498,7 @@ def web_data():
|
|
| 1498 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1499 |
),
|
| 1500 |
style="""
|
| 1501 |
-
background-color: #
|
| 1502 |
padding: 15px;
|
| 1503 |
border-radius: 12px;
|
| 1504 |
margin-bottom: 15px
|
|
@@ -1517,7 +1517,7 @@ def web_data():
|
|
| 1517 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1518 |
),
|
| 1519 |
style="""
|
| 1520 |
-
background-color: #
|
| 1521 |
padding: 15px;
|
| 1522 |
border-radius: 12px;
|
| 1523 |
margin-bottom: 15px
|
|
@@ -1555,7 +1555,7 @@ def web_data():
|
|
| 1555 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1556 |
),
|
| 1557 |
style="""
|
| 1558 |
-
background-color: #
|
| 1559 |
padding: 15px;
|
| 1560 |
border-radius: 12px;
|
| 1561 |
margin-bottom: 15px
|
|
@@ -1588,7 +1588,7 @@ def web_data():
|
|
| 1588 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1589 |
),
|
| 1590 |
style="""
|
| 1591 |
-
background-color: #
|
| 1592 |
padding: 15px;
|
| 1593 |
border-radius: 12px;
|
| 1594 |
margin-bottom: 15px
|
|
@@ -1608,7 +1608,7 @@ def web_data():
|
|
| 1608 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1609 |
),
|
| 1610 |
style="""
|
| 1611 |
-
background-color: #
|
| 1612 |
padding: 15px;
|
| 1613 |
border-radius: 12px;
|
| 1614 |
margin-bottom: 15px
|
|
@@ -1632,7 +1632,7 @@ def web_data():
|
|
| 1632 |
""", block="block", language="python"),
|
| 1633 |
H3("TxT360 Implementation"),
|
| 1634 |
Details(
|
| 1635 |
-
Summary("
|
| 1636 |
Div(
|
| 1637 |
DV(
|
| 1638 |
"data/sample_doc_stat.json",
|
|
@@ -1642,7 +1642,7 @@ def web_data():
|
|
| 1642 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1643 |
),
|
| 1644 |
style="""
|
| 1645 |
-
background-color: #
|
| 1646 |
padding: 15px;
|
| 1647 |
border-radius: 12px;
|
| 1648 |
margin-bottom: 15px
|
|
@@ -1654,13 +1654,13 @@ def web_data():
|
|
| 1654 |
"""),
|
| 1655 |
|
| 1656 |
Details(
|
| 1657 |
-
Summary("
|
| 1658 |
Div(
|
| 1659 |
DV("data/lorem_ipsum.json", 0, "Sample documents containing 'lorem ipsum'"),
|
| 1660 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1661 |
),
|
| 1662 |
style="""
|
| 1663 |
-
background-color: #
|
| 1664 |
padding: 15px;
|
| 1665 |
border-radius: 12px;
|
| 1666 |
margin-bottom: 15px
|
|
|
|
| 366 |
|
| 367 |
|
| 368 |
Details(
|
| 369 |
+
Summary("Non-English Document Examples"),
|
| 370 |
Div(
|
| 371 |
DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
| 372 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 373 |
),
|
| 374 |
style="""
|
| 375 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 376 |
padding: 15px;
|
| 377 |
border-radius: 12px;
|
| 378 |
margin-bottom: 15px
|
|
|
|
| 382 |
#DV("data/sample_non_en.json", 3, "Sample documents that are classified as non-English"),
|
| 383 |
|
| 384 |
Details(
|
| 385 |
+
Summary("English Documents Scoring Lower than 0.65 Examples"),
|
| 386 |
Div(
|
| 387 |
DV("data/sample_en_low.json", 3, "Sample documents that are classified as English but with score less than 0.65"),
|
| 388 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 389 |
),
|
| 390 |
style="""
|
| 391 |
+
background-color: #F0F8FF; /* Light green background */
|
| 392 |
padding: 15px;
|
| 393 |
border-radius: 12px;
|
| 394 |
margin-bottom: 15px
|
|
|
|
| 483 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 484 |
),
|
| 485 |
style="""
|
| 486 |
+
background-color: #F0F8FF; /* Light green background */
|
| 487 |
padding: 15px;
|
| 488 |
border-radius: 12px;
|
| 489 |
margin-bottom: 15px
|
|
|
|
| 510 |
"""),
|
| 511 |
|
| 512 |
Details(
|
| 513 |
+
Summary("Terminal Punctuation Filtering Examples"),
|
| 514 |
Div (
|
| 515 |
DV(
|
| 516 |
"data/sample_terminal_punc.json",
|
|
|
|
| 520 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 521 |
),
|
| 522 |
style="""
|
| 523 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 524 |
padding: 15px;
|
| 525 |
border-radius: 12px;
|
| 526 |
margin-bottom: 15px
|
|
|
|
| 539 |
The additional keyword could be any one of “enable” / “disable” / “require” / “activate” / “browser”.
|
| 540 |
"""),
|
| 541 |
Details(
|
| 542 |
+
Summary("Javascript Examples Filtered by C4 but Kept in TxT360"),
|
| 543 |
Div (
|
| 544 |
DV(
|
| 545 |
"data/sample_java.jsonl",
|
|
|
|
| 549 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 550 |
),
|
| 551 |
style="""
|
| 552 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 553 |
padding: 15px;
|
| 554 |
border-radius: 12px;
|
| 555 |
margin-bottom: 15px
|
|
|
|
| 565 |
Li("the line only contains one word.", style = "margin-bottom: 5px"),
|
| 566 |
),
|
| 567 |
Details(
|
| 568 |
+
Summary("Documents Filtered using RefinedWeb Rules."),
|
| 569 |
Div (
|
| 570 |
DV(
|
| 571 |
"data/sample_refinedweb_line.json",
|
|
|
|
| 575 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 576 |
),
|
| 577 |
style="""
|
| 578 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 579 |
padding: 15px;
|
| 580 |
border-radius: 12px;
|
| 581 |
margin-bottom: 15px
|
|
|
|
| 665 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 666 |
),
|
| 667 |
style="""
|
| 668 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 669 |
padding: 15px;
|
| 670 |
border-radius: 12px;
|
| 671 |
margin-bottom: 15px
|
|
|
|
| 708 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 709 |
),
|
| 710 |
style="""
|
| 711 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 712 |
padding: 15px;
|
| 713 |
border-radius: 12px;
|
| 714 |
margin-bottom: 15px
|
|
|
|
| 762 |
""",
|
| 763 |
),
|
| 764 |
Details(
|
| 765 |
+
Summary("Excessive Line and Character Repetition Filtered Examples"),
|
| 766 |
Div(
|
| 767 |
DV(
|
| 768 |
"data/repeat_line_frac.jsonl",
|
|
|
|
| 772 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 773 |
),
|
| 774 |
style="""
|
| 775 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 776 |
padding: 15px;
|
| 777 |
border-radius: 12px;
|
| 778 |
margin-bottom: 15px
|
|
|
|
| 803 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 804 |
),
|
| 805 |
style="""
|
| 806 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 807 |
padding: 15px;
|
| 808 |
border-radius: 12px;
|
| 809 |
margin-bottom: 15px
|
|
|
|
| 850 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 851 |
),
|
| 852 |
style="""
|
| 853 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 854 |
padding: 15px;
|
| 855 |
border-radius: 12px;
|
| 856 |
margin-bottom: 15px
|
|
|
|
| 882 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 883 |
),
|
| 884 |
style="""
|
| 885 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 886 |
padding: 15px;
|
| 887 |
border-radius: 12px;
|
| 888 |
margin-bottom: 15px
|
|
|
|
| 925 |
""",
|
| 926 |
),
|
| 927 |
Details(
|
| 928 |
+
Summary("Documents Filtered Using Most Common n-Grams (n=2,3,4)"),
|
| 929 |
Div(
|
| 930 |
DV(
|
| 931 |
"data/sample_top_ngram.json",
|
|
|
|
| 935 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 936 |
),
|
| 937 |
style="""
|
| 938 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 939 |
padding: 15px;
|
| 940 |
border-radius: 12px;
|
| 941 |
margin-bottom: 15px
|
|
|
|
| 969 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 970 |
),
|
| 971 |
style="""
|
| 972 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 973 |
padding: 15px;
|
| 974 |
border-radius: 12px;
|
| 975 |
margin-bottom: 15px
|
|
|
|
| 1031 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1032 |
),
|
| 1033 |
style="""
|
| 1034 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1035 |
padding: 15px;
|
| 1036 |
border-radius: 12px;
|
| 1037 |
margin-bottom: 15px
|
|
|
|
| 1065 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1066 |
),
|
| 1067 |
style="""
|
| 1068 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1069 |
padding: 15px;
|
| 1070 |
border-radius: 12px;
|
| 1071 |
margin-bottom: 15px
|
|
|
|
| 1134 |
""",
|
| 1135 |
),
|
| 1136 |
Details(
|
| 1137 |
+
Summary("Comparison of Coding Implementations"),
|
| 1138 |
P("""
|
| 1139 |
Considering n = 5 and the sample sentence:
|
| 1140 |
|
|
|
|
| 1157 |
"Sample Documents Filtered by the Fraction of Characters in Duplicated N-grams (n=5,...,10)"
|
| 1158 |
),
|
| 1159 |
Details(
|
| 1160 |
+
Summary("Documents Filtered by Duplicated n-Grams (n=5,...,10)"),
|
| 1161 |
Div(
|
| 1162 |
DV(
|
| 1163 |
"data/sample_dup_ngram.json",
|
|
|
|
| 1167 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1168 |
),
|
| 1169 |
style="""
|
| 1170 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 1171 |
padding: 15px;
|
| 1172 |
border-radius: 12px;
|
| 1173 |
margin-bottom: 15px
|
|
|
|
| 1201 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1202 |
),
|
| 1203 |
style="""
|
| 1204 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1205 |
padding: 15px;
|
| 1206 |
border-radius: 12px;
|
| 1207 |
margin-bottom: 15px
|
|
|
|
| 1254 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1255 |
),
|
| 1256 |
style="""
|
| 1257 |
+
background-color: #EAFFF1; /* Light yellow background */ #light yellow FFFAEA
|
| 1258 |
padding: 15px;
|
| 1259 |
border-radius: 12px;
|
| 1260 |
margin-bottom: 15px
|
|
|
|
| 1263 |
|
| 1264 |
|
| 1265 |
Details(
|
| 1266 |
+
Summary("Documents Filtered by Line-Wise Heuristics"),
|
| 1267 |
Div(
|
| 1268 |
DV(
|
| 1269 |
"data/line_info.json",
|
|
|
|
| 1273 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1274 |
),
|
| 1275 |
style="""
|
| 1276 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 1277 |
padding: 15px;
|
| 1278 |
border-radius: 12px;
|
| 1279 |
margin-bottom: 15px
|
|
|
|
| 1343 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1344 |
),
|
| 1345 |
style="""
|
| 1346 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1347 |
padding: 15px;
|
| 1348 |
border-radius: 12px;
|
| 1349 |
margin-bottom: 15px
|
|
|
|
| 1363 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1364 |
),
|
| 1365 |
style="""
|
| 1366 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1367 |
padding: 15px;
|
| 1368 |
border-radius: 12px;
|
| 1369 |
margin-bottom: 15px
|
|
|
|
| 1414 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1415 |
),
|
| 1416 |
style="""
|
| 1417 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1418 |
padding: 15px;
|
| 1419 |
border-radius: 12px;
|
| 1420 |
margin-bottom: 15px
|
|
|
|
| 1463 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1464 |
),
|
| 1465 |
style="""
|
| 1466 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1467 |
padding: 15px;
|
| 1468 |
border-radius: 12px;
|
| 1469 |
margin-bottom: 15px
|
|
|
|
| 1498 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1499 |
),
|
| 1500 |
style="""
|
| 1501 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1502 |
padding: 15px;
|
| 1503 |
border-radius: 12px;
|
| 1504 |
margin-bottom: 15px
|
|
|
|
| 1517 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1518 |
),
|
| 1519 |
style="""
|
| 1520 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1521 |
padding: 15px;
|
| 1522 |
border-radius: 12px;
|
| 1523 |
margin-bottom: 15px
|
|
|
|
| 1555 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1556 |
),
|
| 1557 |
style="""
|
| 1558 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1559 |
padding: 15px;
|
| 1560 |
border-radius: 12px;
|
| 1561 |
margin-bottom: 15px
|
|
|
|
| 1588 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1589 |
),
|
| 1590 |
style="""
|
| 1591 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1592 |
padding: 15px;
|
| 1593 |
border-radius: 12px;
|
| 1594 |
margin-bottom: 15px
|
|
|
|
| 1608 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1609 |
),
|
| 1610 |
style="""
|
| 1611 |
+
background-color: #EAFFF1; /* Light yellow background */
|
| 1612 |
padding: 15px;
|
| 1613 |
border-radius: 12px;
|
| 1614 |
margin-bottom: 15px
|
|
|
|
| 1632 |
""", block="block", language="python"),
|
| 1633 |
H3("TxT360 Implementation"),
|
| 1634 |
Details(
|
| 1635 |
+
Summary("Documents Filtered by Statistics-Based Heuristics"),
|
| 1636 |
Div(
|
| 1637 |
DV(
|
| 1638 |
"data/sample_doc_stat.json",
|
|
|
|
| 1642 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1643 |
),
|
| 1644 |
style="""
|
| 1645 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 1646 |
padding: 15px;
|
| 1647 |
border-radius: 12px;
|
| 1648 |
margin-bottom: 15px
|
|
|
|
| 1654 |
"""),
|
| 1655 |
|
| 1656 |
Details(
|
| 1657 |
+
Summary("Documents Containing 'lorem ipsum'"),
|
| 1658 |
Div(
|
| 1659 |
DV("data/lorem_ipsum.json", 0, "Sample documents containing 'lorem ipsum'"),
|
| 1660 |
style="background-color: white; padding: 15px; margin-top: 10px; margin-bottom: 10px; border-radius: 8px; border: none; " # Styling for the DV2 part
|
| 1661 |
),
|
| 1662 |
style="""
|
| 1663 |
+
background-color: #F0F8FF; /* Light pink background */
|
| 1664 |
padding: 15px;
|
| 1665 |
border-radius: 12px;
|
| 1666 |
margin-bottom: 15px
|