zhichyu commited on
Commit
ad4b91a
·
1 Parent(s): 6889379

deepdoc use GPU if possible (#4618)

Browse files

### What problem does this PR solve?

deepdoc use GPU if possible

### Type of change

- [x] Refactoring

deepdoc/vision/ocr.py CHANGED
@@ -14,6 +14,7 @@
14
  # limitations under the License.
15
  #
16
 
 
17
  import copy
18
  import time
19
  import os
@@ -75,17 +76,32 @@ def load_model(model_dir, nm):
75
  options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
76
  options.intra_op_num_threads = 2
77
  options.inter_op_num_threads = 2
78
- if False and ort.get_device() == "GPU":
 
 
 
 
 
 
 
 
 
79
  sess = ort.InferenceSession(
80
  model_file_path,
81
  options=options,
82
- providers=['CUDAExecutionProvider'])
 
 
 
 
83
  else:
84
  sess = ort.InferenceSession(
85
  model_file_path,
86
  options=options,
87
  providers=['CPUExecutionProvider'])
88
- return sess, sess.get_inputs()[0]
 
 
89
 
90
 
91
  class TextRecognizer(object):
@@ -98,7 +114,7 @@ class TextRecognizer(object):
98
  "use_space_char": True
99
  }
100
  self.postprocess_op = build_post_process(postprocess_params)
101
- self.predictor, self.input_tensor = load_model(model_dir, 'rec')
102
 
103
  def resize_norm_img(self, img, max_wh_ratio):
104
  imgC, imgH, imgW = self.rec_image_shape
@@ -344,7 +360,7 @@ class TextRecognizer(object):
344
  input_dict[self.input_tensor.name] = norm_img_batch
345
  for i in range(100000):
346
  try:
347
- outputs = self.predictor.run(None, input_dict)
348
  break
349
  except Exception as e:
350
  if i >= 3:
@@ -383,7 +399,7 @@ class TextDetector(object):
383
  "unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
384
 
385
  self.postprocess_op = build_post_process(postprocess_params)
386
- self.predictor, self.input_tensor = load_model(model_dir, 'det')
387
 
388
  img_h, img_w = self.input_tensor.shape[2:]
389
  if isinstance(img_h, str) or isinstance(img_w, str):
@@ -456,7 +472,7 @@ class TextDetector(object):
456
  input_dict[self.input_tensor.name] = img
457
  for i in range(100000):
458
  try:
459
- outputs = self.predictor.run(None, input_dict)
460
  break
461
  except Exception as e:
462
  if i >= 3:
 
14
  # limitations under the License.
15
  #
16
 
17
+ import logging
18
  import copy
19
  import time
20
  import os
 
76
  options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
77
  options.intra_op_num_threads = 2
78
  options.inter_op_num_threads = 2
79
+
80
+ # https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
81
+ # Shrink GPU memory after execution
82
+ run_options = ort.RunOptions()
83
+ if ort.get_device() == "GPU":
84
+ cuda_provider_options = {
85
+ "device_id": 0, # Use specific GPU
86
+ "gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
87
+ "arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
88
+ }
89
  sess = ort.InferenceSession(
90
  model_file_path,
91
  options=options,
92
+ providers=['CUDAExecutionProvider'],
93
+ provider_options=[cuda_provider_options]
94
+ )
95
+ run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
96
+ logging.info(f"TextRecognizer {nm} uses GPU")
97
  else:
98
  sess = ort.InferenceSession(
99
  model_file_path,
100
  options=options,
101
  providers=['CPUExecutionProvider'])
102
+ run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
103
+ logging.info(f"TextRecognizer {nm} uses CPU")
104
+ return sess, sess.get_inputs()[0], run_options
105
 
106
 
107
  class TextRecognizer(object):
 
114
  "use_space_char": True
115
  }
116
  self.postprocess_op = build_post_process(postprocess_params)
117
+ self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'rec')
118
 
119
  def resize_norm_img(self, img, max_wh_ratio):
120
  imgC, imgH, imgW = self.rec_image_shape
 
360
  input_dict[self.input_tensor.name] = norm_img_batch
361
  for i in range(100000):
362
  try:
363
+ outputs = self.predictor.run(None, input_dict, self.run_options)
364
  break
365
  except Exception as e:
366
  if i >= 3:
 
399
  "unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
400
 
401
  self.postprocess_op = build_post_process(postprocess_params)
402
+ self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'det')
403
 
404
  img_h, img_w = self.input_tensor.shape[2:]
405
  if isinstance(img_h, str) or isinstance(img_w, str):
 
472
  input_dict[self.input_tensor.name] = img
473
  for i in range(100000):
474
  try:
475
+ outputs = self.predictor.run(None, input_dict, self.run_options)
476
  break
477
  except Exception as e:
478
  if i >= 3:
deepdoc/vision/recognizer.py CHANGED
@@ -60,12 +60,29 @@ class Recognizer(object):
60
  if not os.path.exists(model_file_path):
61
  raise ValueError("not find model file path {}".format(
62
  model_file_path))
63
- if False and ort.get_device() == "GPU":
 
 
 
 
64
  options = ort.SessionOptions()
65
  options.enable_cpu_mem_arena = False
66
- self.ort_sess = ort.InferenceSession(model_file_path, options=options, providers=[('CUDAExecutionProvider')])
 
 
 
 
 
 
 
 
 
 
 
67
  else:
68
  self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
 
 
69
  self.input_names = [node.name for node in self.ort_sess.get_inputs()]
70
  self.output_names = [node.name for node in self.ort_sess.get_outputs()]
71
  self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
@@ -454,7 +471,7 @@ class Recognizer(object):
454
  inputs = self.preprocess(batch_image_list)
455
  logging.debug("preprocess")
456
  for ins in inputs:
457
- bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
458
  res.append(bb)
459
 
460
  #seeit.save_results(image_list, res, self.label_list, threshold=thr)
 
60
  if not os.path.exists(model_file_path):
61
  raise ValueError("not find model file path {}".format(
62
  model_file_path))
63
+ # https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
64
+ # Shrink GPU memory after execution
65
+ self.run_options = ort.RunOptions()
66
+
67
+ if ort.get_device() == "GPU":
68
  options = ort.SessionOptions()
69
  options.enable_cpu_mem_arena = False
70
+ cuda_provider_options = {
71
+ "device_id": 0, # Use specific GPU
72
+ "gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
73
+ "arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
74
+ }
75
+ self.ort_sess = ort.InferenceSession(
76
+ model_file_path, options=options,
77
+ providers=['CUDAExecutionProvider'],
78
+ provider_options=[cuda_provider_options]
79
+ )
80
+ self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
81
+ logging.info(f"Recognizer {task_name} uses GPU")
82
  else:
83
  self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
84
+ self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
85
+ logging.info(f"Recognizer {task_name} uses CPU")
86
  self.input_names = [node.name for node in self.ort_sess.get_inputs()]
87
  self.output_names = [node.name for node in self.ort_sess.get_outputs()]
88
  self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
 
471
  inputs = self.preprocess(batch_image_list)
472
  logging.debug("preprocess")
473
  for ins in inputs:
474
+ bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names}, self.run_options)[0], ins, thr)
475
  res.append(bb)
476
 
477
  #seeit.save_results(image_list, res, self.label_list, threshold=thr)