Spaces:
Sleeping
Sleeping
ALLOUNE
commited on
Commit
·
12d5a0c
1
Parent(s):
256eefa
add search bar for agenda items and suggestion marks in .txt files
Browse files- api/docs.py +59 -11
- static/index.html +3 -0
- static/js/ui.js +27 -19
api/docs.py
CHANGED
@@ -35,6 +35,59 @@ NSMAP = {
|
|
35 |
'v': 'urn:schemas-microsoft-com:vml'
|
36 |
}
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
# ================================== Converting of files to .txt ====================================
|
39 |
|
40 |
KREUZBERG_CONFIG: ExtractionConfig = ExtractionConfig(
|
@@ -124,19 +177,14 @@ async def extract_text_contents(filename: str, ext: str, bytes: io.BytesIO) -> l
|
|
124 |
if ext == ".doc":
|
125 |
logging.debug(f"Converting {filename} .doc --> .docx")
|
126 |
docx_bytes = await convert_file_type(bytes, filename, "doc", "docx")
|
127 |
-
logging.debug(
|
128 |
-
|
129 |
-
|
130 |
-
final_text = extracted_data.content
|
131 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
132 |
elif ext == ".docx":
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
logging.debug(
|
137 |
-
f"Extracting content for filename: {filename}, ext: {ext} with converted docx")
|
138 |
-
extracted_data = await extract_bytes(applied_revision.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
|
139 |
-
final_text = extracted_data.content
|
140 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
141 |
elif ext == ".ppt":
|
142 |
logging.debug(f"Converting {filename} .ppt --> .pptx")
|
|
|
35 |
'v': 'urn:schemas-microsoft-com:vml'
|
36 |
}
|
37 |
|
38 |
+
SUGGESTION_START = '[[SUGGESTION_START]]'
|
39 |
+
SUGGESTION_END = '[[SUGGESTION_END]]'
|
40 |
+
|
41 |
+
def extract_docx_text_with_suggestions(docx_stream: io.BytesIO) -> str:
|
42 |
+
try:
|
43 |
+
with zipfile.ZipFile(docx_stream) as z:
|
44 |
+
xml = z.read('word/document.xml')
|
45 |
+
except KeyError:
|
46 |
+
raise FileNotFoundError('word/document.xml not found in the DOCX archive.')
|
47 |
+
|
48 |
+
root = etree.fromstring(xml)
|
49 |
+
body = root.find('w:body', NSMAP)
|
50 |
+
|
51 |
+
out: list[str] = []
|
52 |
+
|
53 |
+
def walk(el, collector: list[str]):
|
54 |
+
tag = etree.QName(el).localname
|
55 |
+
if tag == 'del':
|
56 |
+
return
|
57 |
+
if tag == 'ins':
|
58 |
+
temp: list[str] = []
|
59 |
+
for child in el:
|
60 |
+
walk(child, temp)
|
61 |
+
joined = ''.join(temp)
|
62 |
+
if joined.strip():
|
63 |
+
collector.append(SUGGESTION_START)
|
64 |
+
collector.append(joined)
|
65 |
+
collector.append(SUGGESTION_END)
|
66 |
+
else:
|
67 |
+
collector.append(joined)
|
68 |
+
return
|
69 |
+
if tag == 'p':
|
70 |
+
for child in el:
|
71 |
+
walk(child, collector)
|
72 |
+
collector.append('\n')
|
73 |
+
return
|
74 |
+
if tag == 't':
|
75 |
+
collector.append(el.text or '')
|
76 |
+
return
|
77 |
+
if tag == 'tab':
|
78 |
+
collector.append('\t')
|
79 |
+
return
|
80 |
+
if tag == 'br':
|
81 |
+
collector.append('\n')
|
82 |
+
return
|
83 |
+
for child in el:
|
84 |
+
walk(child, collector)
|
85 |
+
|
86 |
+
if body is not None:
|
87 |
+
walk(body, out)
|
88 |
+
text = ''.join(out).replace('\r', '')
|
89 |
+
return text
|
90 |
+
|
91 |
# ================================== Converting of files to .txt ====================================
|
92 |
|
93 |
KREUZBERG_CONFIG: ExtractionConfig = ExtractionConfig(
|
|
|
177 |
if ext == ".doc":
|
178 |
logging.debug(f"Converting {filename} .doc --> .docx")
|
179 |
docx_bytes = await convert_file_type(bytes, filename, "doc", "docx")
|
180 |
+
logging.debug(f"Extracting content with suggestion markers for filename: {filename}, ext: {ext} (converted)")
|
181 |
+
docx_bytes.seek(0)
|
182 |
+
final_text = extract_docx_text_with_suggestions(docx_bytes)
|
|
|
183 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
184 |
elif ext == ".docx":
|
185 |
+
logging.debug(f"Extracting .docx with suggestion markers for {filename}.")
|
186 |
+
bytes.seek(0)
|
187 |
+
final_text = extract_docx_text_with_suggestions(bytes)
|
|
|
|
|
|
|
|
|
188 |
logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
|
189 |
elif ext == ".ppt":
|
190 |
logging.debug(f"Converting {filename} .ppt --> .pptx")
|
static/index.html
CHANGED
@@ -167,6 +167,9 @@
|
|
167 |
<span class="font-semibold">Tous</span>
|
168 |
</label>
|
169 |
</li>
|
|
|
|
|
|
|
170 |
<div id="agenda-options" class="flex flex-col gap-1"></div>
|
171 |
</ul>
|
172 |
</div>
|
|
|
167 |
<span class="font-semibold">Tous</span>
|
168 |
</label>
|
169 |
</li>
|
170 |
+
<li class="pb-2">
|
171 |
+
<input id="agenda-search-input" type="text" placeholder="Search agenda..." class="input input-bordered w-full" />
|
172 |
+
</li>
|
173 |
<div id="agenda-options" class="flex flex-col gap-1"></div>
|
174 |
</ul>
|
175 |
</div>
|
static/js/ui.js
CHANGED
@@ -81,10 +81,9 @@ export function populateSelect(selectId, options, defaultText = 'Sélectionner..
|
|
81 |
export function populateCheckboxDropdown(optionsContainerId, options, filterType, labelId, selectionSet, onSelect) {
|
82 |
const container = document.getElementById(optionsContainerId);
|
83 |
container.innerHTML = '';
|
84 |
-
selectionSet.clear();
|
85 |
|
86 |
-
|
87 |
-
options.forEach(option => {
|
88 |
const safeId = `${filterType}-${encodeURIComponent(option).replace(/[%\s]/g, '_')}`;
|
89 |
const label = document.createElement('label');
|
90 |
label.className = "flex items-center gap-2 cursor-pointer py-1";
|
@@ -92,42 +91,51 @@ export function populateCheckboxDropdown(optionsContainerId, options, filterType
|
|
92 |
<input type="checkbox" class="${filterType}-checkbox option-checkbox" id="${safeId}" value="${option}">
|
93 |
<span>${option}</span>
|
94 |
`;
|
95 |
-
label.querySelector('input')
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
selectionSet.delete(this.value);
|
100 |
-
}
|
101 |
-
|
102 |
-
// Gestion du label "Tous"
|
103 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
104 |
-
// Gestion du "Tous" global
|
105 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
106 |
if (allBox && allBox.checked) allBox.checked = false;
|
107 |
-
// Si plus rien n'est coché, recoche "Tous"
|
108 |
if (selectionSet.size === 0 && allBox) allBox.checked = true;
|
109 |
onSelect?.();
|
110 |
});
|
111 |
-
|
112 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
-
// Réinitialise le label
|
115 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
116 |
|
117 |
-
// Gestion de "Tous"
|
118 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
119 |
if (allBox) {
|
120 |
allBox.addEventListener('change', function () {
|
121 |
if (this.checked) {
|
122 |
-
// Décoche tout le reste
|
123 |
selectionSet.clear();
|
124 |
container.querySelectorAll('input[type="checkbox"]').forEach(cb => cb.checked = false);
|
125 |
-
this.checked = true;
|
126 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
127 |
applyFilters();
|
128 |
}
|
129 |
});
|
130 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
131 |
}
|
132 |
|
133 |
export function updateCheckboxDropdownLabel(type, labelId, set, totalCount) {
|
|
|
81 |
export function populateCheckboxDropdown(optionsContainerId, options, filterType, labelId, selectionSet, onSelect) {
|
82 |
const container = document.getElementById(optionsContainerId);
|
83 |
container.innerHTML = '';
|
84 |
+
selectionSet.clear();
|
85 |
|
86 |
+
const buildOptionLabel = (option) => {
|
|
|
87 |
const safeId = `${filterType}-${encodeURIComponent(option).replace(/[%\s]/g, '_')}`;
|
88 |
const label = document.createElement('label');
|
89 |
label.className = "flex items-center gap-2 cursor-pointer py-1";
|
|
|
91 |
<input type="checkbox" class="${filterType}-checkbox option-checkbox" id="${safeId}" value="${option}">
|
92 |
<span>${option}</span>
|
93 |
`;
|
94 |
+
const inputEl = label.querySelector('input');
|
95 |
+
inputEl.checked = selectionSet.has(option);
|
96 |
+
inputEl.addEventListener('change', function () {
|
97 |
+
if (this.checked) selectionSet.add(this.value); else selectionSet.delete(this.value);
|
|
|
|
|
|
|
|
|
98 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
|
|
99 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
100 |
if (allBox && allBox.checked) allBox.checked = false;
|
|
|
101 |
if (selectionSet.size === 0 && allBox) allBox.checked = true;
|
102 |
onSelect?.();
|
103 |
});
|
104 |
+
return label;
|
105 |
+
};
|
106 |
+
|
107 |
+
const renderOptions = (list) => {
|
108 |
+
container.innerHTML = '';
|
109 |
+
list.forEach(option => container.appendChild(buildOptionLabel(option)));
|
110 |
+
};
|
111 |
+
|
112 |
+
renderOptions(options);
|
113 |
|
|
|
114 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
115 |
|
|
|
116 |
const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
|
117 |
if (allBox) {
|
118 |
allBox.addEventListener('change', function () {
|
119 |
if (this.checked) {
|
|
|
120 |
selectionSet.clear();
|
121 |
container.querySelectorAll('input[type="checkbox"]').forEach(cb => cb.checked = false);
|
122 |
+
this.checked = true;
|
123 |
updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
|
124 |
applyFilters();
|
125 |
}
|
126 |
});
|
127 |
}
|
128 |
+
|
129 |
+
if (filterType === 'agenda') {
|
130 |
+
const searchInput = document.getElementById('agenda-search-input');
|
131 |
+
if (searchInput) {
|
132 |
+
searchInput.addEventListener('input', () => {
|
133 |
+
const q = searchInput.value.toLowerCase();
|
134 |
+
const filtered = options.filter(o => o.toLowerCase().includes(q));
|
135 |
+
renderOptions(filtered);
|
136 |
+
});
|
137 |
+
}
|
138 |
+
}
|
139 |
}
|
140 |
|
141 |
export function updateCheckboxDropdownLabel(type, labelId, set, totalCount) {
|