deepdoc use GPU if possible (#4618)
Browse files### What problem does this PR solve?
deepdoc use GPU if possible
### Type of change
- [x] Refactoring
- deepdoc/vision/ocr.py +23 -7
- deepdoc/vision/recognizer.py +20 -3
deepdoc/vision/ocr.py
CHANGED
@@ -14,6 +14,7 @@
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
|
|
17 |
import copy
|
18 |
import time
|
19 |
import os
|
@@ -75,17 +76,32 @@ def load_model(model_dir, nm):
|
|
75 |
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
76 |
options.intra_op_num_threads = 2
|
77 |
options.inter_op_num_threads = 2
|
78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
79 |
sess = ort.InferenceSession(
|
80 |
model_file_path,
|
81 |
options=options,
|
82 |
-
providers=['CUDAExecutionProvider']
|
|
|
|
|
|
|
|
|
83 |
else:
|
84 |
sess = ort.InferenceSession(
|
85 |
model_file_path,
|
86 |
options=options,
|
87 |
providers=['CPUExecutionProvider'])
|
88 |
-
|
|
|
|
|
89 |
|
90 |
|
91 |
class TextRecognizer(object):
|
@@ -98,7 +114,7 @@ class TextRecognizer(object):
|
|
98 |
"use_space_char": True
|
99 |
}
|
100 |
self.postprocess_op = build_post_process(postprocess_params)
|
101 |
-
self.predictor, self.input_tensor = load_model(model_dir, 'rec')
|
102 |
|
103 |
def resize_norm_img(self, img, max_wh_ratio):
|
104 |
imgC, imgH, imgW = self.rec_image_shape
|
@@ -344,7 +360,7 @@ class TextRecognizer(object):
|
|
344 |
input_dict[self.input_tensor.name] = norm_img_batch
|
345 |
for i in range(100000):
|
346 |
try:
|
347 |
-
outputs = self.predictor.run(None, input_dict)
|
348 |
break
|
349 |
except Exception as e:
|
350 |
if i >= 3:
|
@@ -383,7 +399,7 @@ class TextDetector(object):
|
|
383 |
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
384 |
|
385 |
self.postprocess_op = build_post_process(postprocess_params)
|
386 |
-
self.predictor, self.input_tensor = load_model(model_dir, 'det')
|
387 |
|
388 |
img_h, img_w = self.input_tensor.shape[2:]
|
389 |
if isinstance(img_h, str) or isinstance(img_w, str):
|
@@ -456,7 +472,7 @@ class TextDetector(object):
|
|
456 |
input_dict[self.input_tensor.name] = img
|
457 |
for i in range(100000):
|
458 |
try:
|
459 |
-
outputs = self.predictor.run(None, input_dict)
|
460 |
break
|
461 |
except Exception as e:
|
462 |
if i >= 3:
|
|
|
14 |
# limitations under the License.
|
15 |
#
|
16 |
|
17 |
+
import logging
|
18 |
import copy
|
19 |
import time
|
20 |
import os
|
|
|
76 |
options.execution_mode = ort.ExecutionMode.ORT_SEQUENTIAL
|
77 |
options.intra_op_num_threads = 2
|
78 |
options.inter_op_num_threads = 2
|
79 |
+
|
80 |
+
# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
|
81 |
+
# Shrink GPU memory after execution
|
82 |
+
run_options = ort.RunOptions()
|
83 |
+
if ort.get_device() == "GPU":
|
84 |
+
cuda_provider_options = {
|
85 |
+
"device_id": 0, # Use specific GPU
|
86 |
+
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
|
87 |
+
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
|
88 |
+
}
|
89 |
sess = ort.InferenceSession(
|
90 |
model_file_path,
|
91 |
options=options,
|
92 |
+
providers=['CUDAExecutionProvider'],
|
93 |
+
provider_options=[cuda_provider_options]
|
94 |
+
)
|
95 |
+
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
96 |
+
logging.info(f"TextRecognizer {nm} uses GPU")
|
97 |
else:
|
98 |
sess = ort.InferenceSession(
|
99 |
model_file_path,
|
100 |
options=options,
|
101 |
providers=['CPUExecutionProvider'])
|
102 |
+
run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
103 |
+
logging.info(f"TextRecognizer {nm} uses CPU")
|
104 |
+
return sess, sess.get_inputs()[0], run_options
|
105 |
|
106 |
|
107 |
class TextRecognizer(object):
|
|
|
114 |
"use_space_char": True
|
115 |
}
|
116 |
self.postprocess_op = build_post_process(postprocess_params)
|
117 |
+
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'rec')
|
118 |
|
119 |
def resize_norm_img(self, img, max_wh_ratio):
|
120 |
imgC, imgH, imgW = self.rec_image_shape
|
|
|
360 |
input_dict[self.input_tensor.name] = norm_img_batch
|
361 |
for i in range(100000):
|
362 |
try:
|
363 |
+
outputs = self.predictor.run(None, input_dict, self.run_options)
|
364 |
break
|
365 |
except Exception as e:
|
366 |
if i >= 3:
|
|
|
399 |
"unclip_ratio": 1.5, "use_dilation": False, "score_mode": "fast", "box_type": "quad"}
|
400 |
|
401 |
self.postprocess_op = build_post_process(postprocess_params)
|
402 |
+
self.predictor, self.input_tensor, self.run_options = load_model(model_dir, 'det')
|
403 |
|
404 |
img_h, img_w = self.input_tensor.shape[2:]
|
405 |
if isinstance(img_h, str) or isinstance(img_w, str):
|
|
|
472 |
input_dict[self.input_tensor.name] = img
|
473 |
for i in range(100000):
|
474 |
try:
|
475 |
+
outputs = self.predictor.run(None, input_dict, self.run_options)
|
476 |
break
|
477 |
except Exception as e:
|
478 |
if i >= 3:
|
deepdoc/vision/recognizer.py
CHANGED
@@ -60,12 +60,29 @@ class Recognizer(object):
|
|
60 |
if not os.path.exists(model_file_path):
|
61 |
raise ValueError("not find model file path {}".format(
|
62 |
model_file_path))
|
63 |
-
|
|
|
|
|
|
|
|
|
64 |
options = ort.SessionOptions()
|
65 |
options.enable_cpu_mem_arena = False
|
66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
else:
|
68 |
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
|
|
|
|
69 |
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
70 |
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
71 |
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
@@ -454,7 +471,7 @@ class Recognizer(object):
|
|
454 |
inputs = self.preprocess(batch_image_list)
|
455 |
logging.debug("preprocess")
|
456 |
for ins in inputs:
|
457 |
-
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names})[0], ins, thr)
|
458 |
res.append(bb)
|
459 |
|
460 |
#seeit.save_results(image_list, res, self.label_list, threshold=thr)
|
|
|
60 |
if not os.path.exists(model_file_path):
|
61 |
raise ValueError("not find model file path {}".format(
|
62 |
model_file_path))
|
63 |
+
# https://github.com/microsoft/onnxruntime/issues/9509#issuecomment-951546580
|
64 |
+
# Shrink GPU memory after execution
|
65 |
+
self.run_options = ort.RunOptions()
|
66 |
+
|
67 |
+
if ort.get_device() == "GPU":
|
68 |
options = ort.SessionOptions()
|
69 |
options.enable_cpu_mem_arena = False
|
70 |
+
cuda_provider_options = {
|
71 |
+
"device_id": 0, # Use specific GPU
|
72 |
+
"gpu_mem_limit": 512 * 1024 * 1024, # Limit gpu memory
|
73 |
+
"arena_extend_strategy": "kNextPowerOfTwo", # gpu memory allocation strategy
|
74 |
+
}
|
75 |
+
self.ort_sess = ort.InferenceSession(
|
76 |
+
model_file_path, options=options,
|
77 |
+
providers=['CUDAExecutionProvider'],
|
78 |
+
provider_options=[cuda_provider_options]
|
79 |
+
)
|
80 |
+
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "gpu:0")
|
81 |
+
logging.info(f"Recognizer {task_name} uses GPU")
|
82 |
else:
|
83 |
self.ort_sess = ort.InferenceSession(model_file_path, providers=['CPUExecutionProvider'])
|
84 |
+
self.run_options.add_run_config_entry("memory.enable_memory_arena_shrinkage", "cpu")
|
85 |
+
logging.info(f"Recognizer {task_name} uses CPU")
|
86 |
self.input_names = [node.name for node in self.ort_sess.get_inputs()]
|
87 |
self.output_names = [node.name for node in self.ort_sess.get_outputs()]
|
88 |
self.input_shape = self.ort_sess.get_inputs()[0].shape[2:4]
|
|
|
471 |
inputs = self.preprocess(batch_image_list)
|
472 |
logging.debug("preprocess")
|
473 |
for ins in inputs:
|
474 |
+
bb = self.postprocess(self.ort_sess.run(None, {k:v for k,v in ins.items() if k in self.input_names}, self.run_options)[0], ins, thr)
|
475 |
res.append(bb)
|
476 |
|
477 |
#seeit.save_results(image_list, res, self.label_list, threshold=thr)
|