viklofg commited on
Commit
ae4b490
·
1 Parent(s): 4ddc7f2

Add example images to image uploader

Browse files

- Added a couple different examples (layout, time period, type of document)
- Each example is conntected to a preset pipeline

.gradio_cache/examples/30002027_00008.jpg ADDED

Git LFS Details

  • SHA256: 66382cca3ec6139b3debd4249b7257d7d8c2d8115909cf2d7a9d68b971c6a0a0
  • Pointer size: 132 Bytes
  • Size of remote file: 1.94 MB
.gradio_cache/examples/451511_1512_01.jpg ADDED

Git LFS Details

  • SHA256: 1c9b4d28084399f2815fe7d5b75d2d53c29deab7eead91c32f649d6fa4b0697f
  • Pointer size: 131 Bytes
  • Size of remote file: 383 kB
.gradio_cache/examples/A0062408_00006.jpg ADDED

Git LFS Details

  • SHA256: 27188c087d6af7040ad6d881ece05cfb3eb3b8760940cfd97610e703b82ff329
  • Pointer size: 131 Bytes
  • Size of remote file: 337 kB
.gradio_cache/examples/A0070302_00201.jpg ADDED

Git LFS Details

  • SHA256: f7c010858666023ba94a6159b3ed6dd233ee61698130fdfe0f4495a00c260c58
  • Pointer size: 132 Bytes
  • Size of remote file: 1.18 MB
.gradio_cache/examples/A0073477_00025.jpg ADDED

Git LFS Details

  • SHA256: 4aa2ae50394f345de3d9907eb99b55cdc8fe6eaffdd22e7203a5b98f3817e63a
  • Pointer size: 131 Bytes
  • Size of remote file: 770 kB
.gradio_cache/examples/C0000546_00085_crop.png ADDED

Git LFS Details

  • SHA256: efbf511854f49de1789cd6026ebd9d204360190cd23f51badaf76c5d54912236
  • Pointer size: 131 Bytes
  • Size of remote file: 357 kB
.gradio_cache/examples/R0003364_00005.jpg ADDED

Git LFS Details

  • SHA256: ff74ce82625e473b4cf070b0bf03d2c60dfa6640f38b36c80a94003ed8aff21e
  • Pointer size: 131 Bytes
  • Size of remote file: 671 kB
Dockerfile CHANGED
@@ -8,6 +8,7 @@ ENV PYTHONUNBUFFERED=1 \
8
  GRADIO_NUM_PORTS=1 \
9
  GRADIO_SERVER_NAME=0.0.0.0 \
10
  GRADIO_THEME=huggingface \
 
11
  SYSTEM=spaces \
12
  AM_I_IN_A_DOCKER_CONTAINER=Yes \
13
  PYTHONPATH=/home/appuser/app \
 
8
  GRADIO_NUM_PORTS=1 \
9
  GRADIO_SERVER_NAME=0.0.0.0 \
10
  GRADIO_THEME=huggingface \
11
+ GRADIO_CACHE_DIR=/home/appuser/.gradio_cache \
12
  SYSTEM=spaces \
13
  AM_I_IN_A_DOCKER_CONTAINER=Yes \
14
  PYTHONPATH=/home/appuser/app \
app/tabs/submit.py CHANGED
@@ -5,26 +5,51 @@ import gradio as gr
5
  from htrflow.pipeline.pipeline import Pipeline
6
  from htrflow.pipeline.steps import init_step
7
  import os
 
8
  from htrflow.volume.volume import Collection
9
 
10
  from htrflow.pipeline.steps import auto_import
11
  import yaml
12
 
13
- MAX_IMAGES = int(os.environ.get("MAX_IMAGES", 5)) # env: Maximum allowed images
14
- PIPELINE_DOCUMENTATION = (
15
- "https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines"
16
- )
 
 
17
  PIPELINES = {
18
  "Running text (Swedish)": {
19
  "file": "app/assets/templates/2_nested.yaml",
20
  "description": "This pipeline works well on documents with multiple text regions.",
 
 
 
 
 
21
  },
22
- "Letters (Swedish)": {
23
  "file": "app/assets/templates/1_simple.yaml",
24
  "description": "This pipeline works well on letters and other documents with only one text region.",
 
 
 
 
 
 
25
  },
26
  }
27
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  class PipelineWithProgress(Pipeline):
30
  @classmethod
@@ -170,24 +195,74 @@ def tracking_exported_files(tmp_output_paths):
170
  return sorted(exported_files)
171
 
172
 
173
- def get_description(pipeline: str):
 
 
 
174
  return PIPELINES[pipeline]["description"]
175
 
176
 
177
- def get_yaml(pipeline: str):
 
 
 
 
 
 
178
  with open(PIPELINES[pipeline]["file"], "r") as f:
179
  pipeline = f.read()
180
  return pipeline
181
 
182
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  with gr.Blocks() as submit:
184
  collection_submit_state = gr.State()
185
- batch_image_gallery = gr.Gallery(
186
- file_types=["image"],
187
- label="Upload the images you want to transcribe",
188
- interactive=True,
189
- object_fit="cover",
190
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
 
192
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
193
  gr.HTML("Pipeline", elem_classes="pipeline-header", padding=False)
@@ -197,7 +272,7 @@ with gr.Blocks() as submit:
197
  PIPELINES, container=False, min_width=240, scale=0, elem_classes="pipeline-dropdown"
198
  )
199
  pipeline_description = gr.HTML(
200
- value=get_description, inputs=pipeline_dropdown, elem_classes="pipeline-description", padding=False
201
  )
202
 
203
  with gr.Group():
@@ -205,8 +280,9 @@ with gr.Blocks() as submit:
205
  custom_template_yaml = gr.Code(
206
  value=get_yaml, inputs=pipeline_dropdown, language="yaml", container=False
207
  )
 
208
  gr.HTML(
209
- f'See the <a href="{PIPELINE_DOCUMENTATION}">documentation</a> for a detailed description on how to customize HTRflow pipelines.',
210
  padding=False,
211
  elem_classes="pipeline-help",
212
  )
@@ -238,5 +314,8 @@ with gr.Blocks() as submit:
238
  outputs=[progess_bar, collection_output_files],
239
  )
240
 
 
 
 
241
  # TODO: valudate yaml before submitting...?
242
  # TODO: Add toast gr.Warning: Lose previues run...
 
5
  from htrflow.pipeline.pipeline import Pipeline
6
  from htrflow.pipeline.steps import init_step
7
  import os
8
+ import logging
9
  from htrflow.volume.volume import Collection
10
 
11
  from htrflow.pipeline.steps import auto_import
12
  import yaml
13
 
14
+ logger = logging.getLogger(__name__)
15
+
16
+ # Max number of images a user can upload at once
17
+ MAX_IMAGES = int(os.environ.get("MAX_IMAGES", 5))
18
+
19
+ # Example pipelines
20
  PIPELINES = {
21
  "Running text (Swedish)": {
22
  "file": "app/assets/templates/2_nested.yaml",
23
  "description": "This pipeline works well on documents with multiple text regions.",
24
+ "examples": [
25
+ "R0003364_00005.jpg",
26
+ "30002027_00008.jpg",
27
+ "A0070302_00201.jpg",
28
+ ]
29
  },
30
+ "Letters and snippets (Swedish)": {
31
  "file": "app/assets/templates/1_simple.yaml",
32
  "description": "This pipeline works well on letters and other documents with only one text region.",
33
+ "examples": [
34
+ "451511_1512_01.jpg",
35
+ "A0062408_00006.jpg",
36
+ "C0000546_00085_crop.png",
37
+ "A0073477_00025.jpg",
38
+ ]
39
  },
40
  }
41
 
42
+ # Setup the cache directory to point to the directory where the example images
43
+ # are located. The images must lay in the cache directory because otherwise they
44
+ # have to be reuploaded when drag-and-dropped to the input image widget.
45
+ GRADIO_CACHE = ".gradio_cache"
46
+ EXAMPLES_DIRECTORY = os.path.join(GRADIO_CACHE, "examples")
47
+
48
+ if os.environ.get("GRADIO_CACHE_DIR", GRADIO_CACHE) != GRADIO_CACHE:
49
+ logger.warning(
50
+ "Setting GRADIO_CACHE_DIR to '%s' (overriding a previous value)."
51
+ )
52
+
53
 
54
  class PipelineWithProgress(Pipeline):
55
  @classmethod
 
195
  return sorted(exported_files)
196
 
197
 
198
+ def get_pipeline_description(pipeline: str) -> str:
199
+ """
200
+ Get the description of the given pipeline
201
+ """
202
  return PIPELINES[pipeline]["description"]
203
 
204
 
205
+ def get_yaml(pipeline: str) -> str:
206
+ """
207
+ Get the yaml file for the given pipeline
208
+
209
+ Args:
210
+ pipeline: Name of pipeline (must be a key in the PIPELINES directory)
211
+ """
212
  with open(PIPELINES[pipeline]["file"], "r") as f:
213
  pipeline = f.read()
214
  return pipeline
215
 
216
 
217
+ def all_example_images() -> list[str]:
218
+ """
219
+ Get paths to all example images.
220
+ """
221
+ examples = []
222
+ for pipeline in PIPELINES.values():
223
+ for example in pipeline.get("examples", []):
224
+ examples.append(os.path.join(EXAMPLES_DIRECTORY, example))
225
+ return examples
226
+
227
+
228
+ def get_selected_example_image(event: gr.SelectData) -> str:
229
+ """
230
+ Get path to the selected example image.
231
+ """
232
+ return [event.value["image"]["path"]]
233
+
234
+
235
+ def get_selected_example_pipeline(event: gr.SelectData) -> str | None:
236
+ """
237
+ Get the name of the pipeline that corresponds to the selected image.
238
+ """
239
+ for name, details in PIPELINES.items():
240
+ if event.value["image"]["orig_name"] in details.get("examples", []):
241
+ return name
242
+
243
+
244
  with gr.Blocks() as submit:
245
  collection_submit_state = gr.State()
246
+
247
+ with gr.Group():
248
+ with gr.Row(equal_height=True):
249
+ batch_image_gallery = gr.Gallery(
250
+ file_types=["image"],
251
+ label="Image to transcribe",
252
+ interactive=True,
253
+ object_fit="scale-down",
254
+ scale=3,
255
+ preview=True
256
+ )
257
+
258
+ examples = gr.Gallery(
259
+ all_example_images(),
260
+ label="Examples",
261
+ interactive=False,
262
+ allow_preview=False,
263
+ object_fit="scale-down",
264
+ min_width=250,
265
+ )
266
 
267
  with gr.Column(variant="panel", elem_classes="pipeline-panel"):
268
  gr.HTML("Pipeline", elem_classes="pipeline-header", padding=False)
 
272
  PIPELINES, container=False, min_width=240, scale=0, elem_classes="pipeline-dropdown"
273
  )
274
  pipeline_description = gr.HTML(
275
+ value=get_pipeline_description, inputs=pipeline_dropdown, elem_classes="pipeline-description", padding=False
276
  )
277
 
278
  with gr.Group():
 
280
  custom_template_yaml = gr.Code(
281
  value=get_yaml, inputs=pipeline_dropdown, language="yaml", container=False
282
  )
283
+ url = "https://ai-riksarkivet.github.io/htrflow/latest/getting_started/pipeline.html#example-pipelines"
284
  gr.HTML(
285
+ f'See the <a href="{url}">documentation</a> for a detailed description on how to customize HTRflow pipelines.',
286
  padding=False,
287
  elem_classes="pipeline-help",
288
  )
 
314
  outputs=[progess_bar, collection_output_files],
315
  )
316
 
317
+ examples.select(get_selected_example_image, None, batch_image_gallery)
318
+ examples.select(get_selected_example_pipeline, None, pipeline_dropdown)
319
+
320
  # TODO: valudate yaml before submitting...?
321
  # TODO: Add toast gr.Warning: Lose previues run...