# For torchvision models, input images are loaded in to a range of [0, 1] and # normalized using mean = [0.485, 0.456, 0.406] and stddev = [0.229, 0.224, 0.225]. defpreprocess(image): # Mean normalization mean = np.array([0.485, 0.456, 0.406]).astype('float32') stddev = np.array([0.229, 0.224, 0.225]).astype('float32') data = (np.asarray(image).astype('float32') / float(255.0) - mean) / stddev
# Switch from HWC to to CHW order return np.moveaxis(data, 2, 0)
defpostprocess(data): num_classes = 21 # create a color palette, selecting a color for each class palette = np.array([2 ** 25 - 1, 2 ** 15 - 1, 2 ** 21 - 1]) colors = np.array([palette*i%255for i inrange(num_classes)]).astype("uint8") # plot the segmentation predictions for 21 classes in different colors img = Image.fromarray(data.astype('uint8'), mode='P') img.putpalette(colors) return img
现在,准备使用TensorRT的原生API进行推理了。
先定义将导出的TensorRT engine加载进来的函数,如下:
1 2 3 4 5
defload_engine(engine_file_path): assert os.path.exists(engine_file_path) print("Reading engine from file {}".format(engine_file_path)) withopen(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime: return runtime.deserialize_cuda_engine(f.read())
definfer(engine, input_file, output_file): print("Reading input image from file {}".format(input_file)) with Image.open(input_file) as img: input_image = preprocess(img) image_width = img.width image_height = img.height print(input_image.shape,image_height,image_width)
with engine.create_execution_context() as context: # Set input shape based on image dimensions for inference context.set_binding_shape(engine.get_binding_index("input"), (1, 3, image_height, image_width)) # Allocate host and device buffers bindings = [] for binding in engine: print('binding: ',binding) binding_idx = engine.get_binding_index(binding) size = trt.volume(context.get_binding_shape(binding_idx)) dtype = trt.nptype(engine.get_binding_dtype(binding)) if engine.binding_is_input(binding):
output_buffer = cuda.pagelocked_empty(size, dtype) output_memory = cuda.mem_alloc(output_buffer.nbytes) bindings.append(int(output_memory)) stream = cuda.Stream() # Transfer input data to the GPU. cuda.memcpy_htod_async(input_memory, input_buffer, stream) # Run inference context.execute_async_v2(bindings=bindings, stream_handle=stream.handle) # Transfer prediction output from the GPU. cuda.memcpy_dtoh_async(output_buffer, output_memory, stream) # Synchronize the stream stream.synchronize()
with postprocess(np.reshape(output_buffer, (image_height, image_width))) as img: print("Writing output image to file {}".format(output_file)) img.convert('RGB').save(output_file, "PPM") #print(output_buffer)
最后,使用测试图片执行推理:
1 2 3 4
# according to : https://github.com/NVIDIA/TensorRT/issues/2052 engine= load_engine(engine_file)# 加载engine infer(engine, input_file, output_file)# 执行推理 del engine# 推理完成后释放engine的内存空间