ALLOUNE commited on
Commit
12d5a0c
·
1 Parent(s): 256eefa

add search bar for agenda items and suggestion marks in .txt files

Browse files
Files changed (3) hide show
  1. api/docs.py +59 -11
  2. static/index.html +3 -0
  3. static/js/ui.js +27 -19
api/docs.py CHANGED
@@ -35,6 +35,59 @@ NSMAP = {
35
  'v': 'urn:schemas-microsoft-com:vml'
36
  }
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  # ================================== Converting of files to .txt ====================================
39
 
40
  KREUZBERG_CONFIG: ExtractionConfig = ExtractionConfig(
@@ -124,19 +177,14 @@ async def extract_text_contents(filename: str, ext: str, bytes: io.BytesIO) -> l
124
  if ext == ".doc":
125
  logging.debug(f"Converting {filename} .doc --> .docx")
126
  docx_bytes = await convert_file_type(bytes, filename, "doc", "docx")
127
- logging.debug(
128
- f"Extracting content for filename: {filename}, ext: {ext} with converted doc")
129
- extracted_data = await extract_bytes(docx_bytes.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
130
- final_text = extracted_data.content
131
  logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
132
  elif ext == ".docx":
133
- # Applying doc revisions to docx files (especially for pCR / draftCR files)
134
- logging.debug(f"Updating .docx revisions for {filename}.")
135
- applied_revision = apply_docx_revisions(zipfile.ZipFile(bytes))
136
- logging.debug(
137
- f"Extracting content for filename: {filename}, ext: {ext} with converted docx")
138
- extracted_data = await extract_bytes(applied_revision.read(), FORMAT_MIME_TYPES[".docx"], config=KREUZBERG_CONFIG)
139
- final_text = extracted_data.content
140
  logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
141
  elif ext == ".ppt":
142
  logging.debug(f"Converting {filename} .ppt --> .pptx")
 
35
  'v': 'urn:schemas-microsoft-com:vml'
36
  }
37
 
38
+ SUGGESTION_START = '[[SUGGESTION_START]]'
39
+ SUGGESTION_END = '[[SUGGESTION_END]]'
40
+
41
+ def extract_docx_text_with_suggestions(docx_stream: io.BytesIO) -> str:
42
+ try:
43
+ with zipfile.ZipFile(docx_stream) as z:
44
+ xml = z.read('word/document.xml')
45
+ except KeyError:
46
+ raise FileNotFoundError('word/document.xml not found in the DOCX archive.')
47
+
48
+ root = etree.fromstring(xml)
49
+ body = root.find('w:body', NSMAP)
50
+
51
+ out: list[str] = []
52
+
53
+ def walk(el, collector: list[str]):
54
+ tag = etree.QName(el).localname
55
+ if tag == 'del':
56
+ return
57
+ if tag == 'ins':
58
+ temp: list[str] = []
59
+ for child in el:
60
+ walk(child, temp)
61
+ joined = ''.join(temp)
62
+ if joined.strip():
63
+ collector.append(SUGGESTION_START)
64
+ collector.append(joined)
65
+ collector.append(SUGGESTION_END)
66
+ else:
67
+ collector.append(joined)
68
+ return
69
+ if tag == 'p':
70
+ for child in el:
71
+ walk(child, collector)
72
+ collector.append('\n')
73
+ return
74
+ if tag == 't':
75
+ collector.append(el.text or '')
76
+ return
77
+ if tag == 'tab':
78
+ collector.append('\t')
79
+ return
80
+ if tag == 'br':
81
+ collector.append('\n')
82
+ return
83
+ for child in el:
84
+ walk(child, collector)
85
+
86
+ if body is not None:
87
+ walk(body, out)
88
+ text = ''.join(out).replace('\r', '')
89
+ return text
90
+
91
  # ================================== Converting of files to .txt ====================================
92
 
93
  KREUZBERG_CONFIG: ExtractionConfig = ExtractionConfig(
 
177
  if ext == ".doc":
178
  logging.debug(f"Converting {filename} .doc --> .docx")
179
  docx_bytes = await convert_file_type(bytes, filename, "doc", "docx")
180
+ logging.debug(f"Extracting content with suggestion markers for filename: {filename}, ext: {ext} (converted)")
181
+ docx_bytes.seek(0)
182
+ final_text = extract_docx_text_with_suggestions(docx_bytes)
 
183
  logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
184
  elif ext == ".docx":
185
+ logging.debug(f"Extracting .docx with suggestion markers for {filename}.")
186
+ bytes.seek(0)
187
+ final_text = extract_docx_text_with_suggestions(bytes)
 
 
 
 
188
  logging.debug(f"Got text content for filename: {filename}, ext: {ext}")
189
  elif ext == ".ppt":
190
  logging.debug(f"Converting {filename} .ppt --> .pptx")
static/index.html CHANGED
@@ -167,6 +167,9 @@
167
  <span class="font-semibold">Tous</span>
168
  </label>
169
  </li>
 
 
 
170
  <div id="agenda-options" class="flex flex-col gap-1"></div>
171
  </ul>
172
  </div>
 
167
  <span class="font-semibold">Tous</span>
168
  </label>
169
  </li>
170
+ <li class="pb-2">
171
+ <input id="agenda-search-input" type="text" placeholder="Search agenda..." class="input input-bordered w-full" />
172
+ </li>
173
  <div id="agenda-options" class="flex flex-col gap-1"></div>
174
  </ul>
175
  </div>
static/js/ui.js CHANGED
@@ -81,10 +81,9 @@ export function populateSelect(selectId, options, defaultText = 'Sélectionner..
81
  export function populateCheckboxDropdown(optionsContainerId, options, filterType, labelId, selectionSet, onSelect) {
82
  const container = document.getElementById(optionsContainerId);
83
  container.innerHTML = '';
84
- selectionSet.clear(); // reset all
85
 
86
- // Ajoute chaque option
87
- options.forEach(option => {
88
  const safeId = `${filterType}-${encodeURIComponent(option).replace(/[%\s]/g, '_')}`;
89
  const label = document.createElement('label');
90
  label.className = "flex items-center gap-2 cursor-pointer py-1";
@@ -92,42 +91,51 @@ export function populateCheckboxDropdown(optionsContainerId, options, filterType
92
  <input type="checkbox" class="${filterType}-checkbox option-checkbox" id="${safeId}" value="${option}">
93
  <span>${option}</span>
94
  `;
95
- label.querySelector('input').addEventListener('change', function () {
96
- if (this.checked) {
97
- selectionSet.add(this.value);
98
- } else {
99
- selectionSet.delete(this.value);
100
- }
101
-
102
- // Gestion du label "Tous"
103
  updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
104
- // Gestion du "Tous" global
105
  const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
106
  if (allBox && allBox.checked) allBox.checked = false;
107
- // Si plus rien n'est coché, recoche "Tous"
108
  if (selectionSet.size === 0 && allBox) allBox.checked = true;
109
  onSelect?.();
110
  });
111
- container.appendChild(label);
112
- });
 
 
 
 
 
 
 
113
 
114
- // Réinitialise le label
115
  updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
116
 
117
- // Gestion de "Tous"
118
  const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
119
  if (allBox) {
120
  allBox.addEventListener('change', function () {
121
  if (this.checked) {
122
- // Décoche tout le reste
123
  selectionSet.clear();
124
  container.querySelectorAll('input[type="checkbox"]').forEach(cb => cb.checked = false);
125
- this.checked = true; // reste coché
126
  updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
127
  applyFilters();
128
  }
129
  });
130
  }
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
 
133
  export function updateCheckboxDropdownLabel(type, labelId, set, totalCount) {
 
81
  export function populateCheckboxDropdown(optionsContainerId, options, filterType, labelId, selectionSet, onSelect) {
82
  const container = document.getElementById(optionsContainerId);
83
  container.innerHTML = '';
84
+ selectionSet.clear();
85
 
86
+ const buildOptionLabel = (option) => {
 
87
  const safeId = `${filterType}-${encodeURIComponent(option).replace(/[%\s]/g, '_')}`;
88
  const label = document.createElement('label');
89
  label.className = "flex items-center gap-2 cursor-pointer py-1";
 
91
  <input type="checkbox" class="${filterType}-checkbox option-checkbox" id="${safeId}" value="${option}">
92
  <span>${option}</span>
93
  `;
94
+ const inputEl = label.querySelector('input');
95
+ inputEl.checked = selectionSet.has(option);
96
+ inputEl.addEventListener('change', function () {
97
+ if (this.checked) selectionSet.add(this.value); else selectionSet.delete(this.value);
 
 
 
 
98
  updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
 
99
  const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
100
  if (allBox && allBox.checked) allBox.checked = false;
 
101
  if (selectionSet.size === 0 && allBox) allBox.checked = true;
102
  onSelect?.();
103
  });
104
+ return label;
105
+ };
106
+
107
+ const renderOptions = (list) => {
108
+ container.innerHTML = '';
109
+ list.forEach(option => container.appendChild(buildOptionLabel(option)));
110
+ };
111
+
112
+ renderOptions(options);
113
 
 
114
  updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
115
 
 
116
  const allBox = document.querySelector(`.${filterType}-checkbox[value="all"]`);
117
  if (allBox) {
118
  allBox.addEventListener('change', function () {
119
  if (this.checked) {
 
120
  selectionSet.clear();
121
  container.querySelectorAll('input[type="checkbox"]').forEach(cb => cb.checked = false);
122
+ this.checked = true;
123
  updateCheckboxDropdownLabel(filterType, labelId, selectionSet, options.length);
124
  applyFilters();
125
  }
126
  });
127
  }
128
+
129
+ if (filterType === 'agenda') {
130
+ const searchInput = document.getElementById('agenda-search-input');
131
+ if (searchInput) {
132
+ searchInput.addEventListener('input', () => {
133
+ const q = searchInput.value.toLowerCase();
134
+ const filtered = options.filter(o => o.toLowerCase().includes(q));
135
+ renderOptions(filtered);
136
+ });
137
+ }
138
+ }
139
  }
140
 
141
  export function updateCheckboxDropdownLabel(type, labelId, set, totalCount) {