File size: 3,161 Bytes
cc6362f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
<html>

<head>
  <title>Chunky | Sentence Segmentation Service</title>
  <meta name="description" content="Chunky is a sentence segmentation service.">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  <style>
    body {
      margin: 0;
      font-family: ui-monospace,
        Menlo, Monaco,
        "Cascadia Mono", "Segoe UI Mono",
        "Roboto Mono",
        "Oxygen Mono",
        "Ubuntu Mono",
        "Source Code Pro",
        "Fira Mono",
        "Droid Sans Mono",
        "Consolas", "Courier New", monospace;
    }

    main {
      margin: 0 auto;
      padding: 1rem;
      max-width: 73ch;
    }

    ul {
      list-style: square;
    }

    div[data-autogrow]:has(textarea) {
      display: grid;
    }

    div[data-autogrow]:has(textarea)::after {
      content: attr(data-autogrow) ' ';
      white-space: pre-wrap;
      visibility: hidden;
    }

    div[data-autogrow]:has(textarea)>textarea {
      resize: none;
      overflow: hidden;
    }

    div[data-autogrow]:has(textarea)>textarea,
    div[data-autogrow]:has(textarea)::after {
      grid-area: 1 / 1 / 2 / 2;
      border: 1px solid currentColor;
      padding: 0.5rem;
      font: inherit;
      text-wrap: stable;
    }
  </style>
</head>

<body>
  <main>

    <h1>chunky</h1>
    <p>Sentence Segmentation Service</p>
    <form action="/split" method="post" enctype="multipart/form-data">


      <label for="text">Text:</label><br>
      <div data-autogrow="">
        <textarea name="text" id="text" rows="1" maxlength="16000"></textarea>
      </div>

      <input type="submit" value="Split Sentences">
    </form>
    {% if sentences %}
    <button id="copy" type="button">copy</button>
    {%endif %}
    <ul>
      {% for sentence in sentences %}
      <li>{{ sentence }}</li>
      {% endfor %}
    </ul>

    <section>
      <h2>References</h2>
      <article>
        <header>
          <h3>Segment Any Text: A Universal Approach for Robust, Efficient and Adaptable Sentence Segmentation</h3>
          <p>by Markus Frohmann, Igor Sterner, Ivan Vulić, Benjamin Minixhofer, and Markus Schedl</p>
        </header>
        <p>
          <cite>
            Frohmann, M., Sterner, I., Vulić, I., Minixhofer, B., & Schedl, M. (2024). Segment Any Text: A Universal
            Approach for Robust, Efficient and Adaptable Sentence Segmentation.
            <em>arXiv preprint arXiv:2406.16678</em>.
            <a href="https://doi.org/10.48550/arXiv.2406.16678">https://doi.org/10.48550/arXiv.2406.16678</a>
          </cite>
        </p>
      </article>
    </section>
  </main>

  <noscript>
    {{sentences|safe|trim}}
  </noscript>
  <script>
    const textarea = document.querySelector("textarea");
    if (textarea) {
      textarea.addEventListener("input", (e) => {
        textarea.parentElement.dataset["autogrow"] = textarea.value;
      })
    }
    document.querySelector("button#copy").addEventListener("click", (e) => {
      navigator.clipboard.writeText(document.querySelector("noscript").textContent.trim() ?? "[]")
    })
  </script>
</body>

</html>