python-3.x - 您如何解析来自 INT8 Calibration of TensorRT 的 bin 文件?
问题描述
我创建了一个 python 脚本,用于校准(INT8)使用 TensorRT 激活 TinyYOLO V2 的动态比例。该脚本给了我一个名为calibration_cache.bin 的文件。如何解析 .bin 文件?.bin 文件中的值是什么意思?
校准器.py
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
from PIL import Image
import ctypes
import tensorrt as trt
import os
CHANNEL = 3
HEIGHT = 416
WIDTH = 416
class PythonEntropyCalibrator(trt.IInt8EntropyCalibrator):
def __init__(self, input_layers, stream):
trt.IInt8EntropyCalibrator.__init__(self)
self.input_layers = input_layers
self.stream = stream
self.d_input = cuda.mem_alloc(self.stream.calibration_data.nbytes)
stream.reset()
def get_batch_size(self):
return self.stream.batch_size
def get_batch(self, bindings, names):
batch = self.stream.next_batch()
if not batch.size:
return None
cuda.memcpy_htod(self.d_input, batch)
for i in self.input_layers[0]:
assert names[0] != i
bindings[0] = int(self.d_input)
return bindings
def read_calibration_cache(self, length=0):
if os.path.exists('calibration_cache.bin'):
with open('calibration_cache.bin', 'rb') as f:
return f.read()
return None
def write_calibration_cache(self, cache, size=0):
with open('calibration_cache.bin', 'wb') as f:
f.write(cache)
return None
class ImageBatchStream():
def __init__(self, batch_size, calibration_files, preprocessor):
self.batch_size = batch_size
self.max_batches = (len(calibration_files) // batch_size) + \
(1 if (len(calibration_files) % batch_size) \
else 0)
self.files = calibration_files
self.calibration_data = np.zeros((batch_size, CHANNEL, HEIGHT, WIDTH), \
dtype=np.float32)
self.batch = 0
self.preprocessor = preprocessor
@staticmethod
def read_image_chw(path):
img = Image.open(path).resize((WIDTH,HEIGHT), Image.NEAREST)
im = np.array(img, dtype=np.float32, order='C')
im = im[:,:,::-1]
im = im.transpose((2,0,1))
return im
def reset(self):
self.batch = 0
def next_batch(self):
if self.batch < self.max_batches:
imgs = []
files_for_batch = self.files[self.batch_size * self.batch : \
self.batch_size * (self.batch + 1)]
for f in files_for_batch:
self.batch_size * (self.batch + 1)]
for f in files_for_batch:
print("[ImageBatchStream] Processing ", f)
img = ImageBatchStream.read_image_chw(f)
img = self.preprocessor(img)
imgs.append(img)
for i in range(len(imgs)):
self.calibration_data[i] = imgs[i]
self.batch += 1
return np.ascontiguousarray(self.calibration_data, dtype=np.float32)
else:
return np.array([])
测试.py
from random import shuffle
from PIL import Image
import glob
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import os
from calibrator import *
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
model_file = './tiny_yolov2/Model.onnx'
dataset_loc = './Dataset/*.jpg'
def normalize(data):
data /= 255.0
return data
def create_calibration_dataset():
calibration_files = glob.glob(dataset_loc)
shuffle(calibration_files)
return calibration_files[:20]
calibration_files = create_calibration_dataset()
NUM_IMAGES_PER_BATCH = 5
batchstream = ImageBatchStream(NUM_IMAGES_PER_BATCH, calibration_files, normalize)
Int8_calibrator = PythonEntropyCalibrator(["conv2d_91_input"], batchstream)
builder = trt.Builder(TRT_LOGGER)
builder.int8_calibrator = Int8_calibrator
builder.refittable = True
builder.int8_mode = True
network = builder.create_network()
parser = trt.OnnxParser(network, TRT_LOGGER)
print(builder.int8_mode, builder.platform_has_fast_int8,builder.refittable)
with open(model_file, 'rb') as model:
parser.parse(model.read())
print('Done reading ONNX File\n')
engine = builder.build_cuda_engine(network)
print(engine, TRT_LOGGER)
with open("model.trt", "wb") as f:
f.write(engine.serialize())
print("Done converting the ONNX to TRT\n")
tinyolo_fitter = trt.Refitter(engine, TRT_LOGGER)
print(tinyolo_fitter.refit_cuda_engine())
print(tinyolo_fitter.get_tensors_with_dynamic_range())
校准缓存.bin
TRT-5105-熵校准 图片:3c010a14 scalerPreprocessor_scaled:38018ba0 图2:38018ba0 批量标准化_1_输出:3d07b31d 泄漏relu_1_输出:3c98a317 最大池2d_1_输出:3c1e5b30 批量标准化_2_输出:3ca6aa67 泄漏relu_2_输出:3ca6aa67 最大池2d_2_输出:3c82cf7d 批量标准化_3_输出:3ce07ce8 泄漏relu_3_输出:3ce52236 maxpooling2d_3_output: 3cc8ed6f 批量标准化_4_输出:3d3df55f 泄漏relu_4_输出:3c651727 maxpooling2d_4_output: 3cec84fc 批量标准化_5_输出:3d0f51e3 泄漏relu_5_输出:3cb52377 maxpooling2d_5_output:3d026049 批量标准化_6_输出:3d387291 leakyrelu_6_output: 3ccc009a 最大池2d_6_输出:3c8d0f0c 批量标准化_7_输出:3e0de3d2 泄漏relu_7_输出:3d7b4397 批量标准化_8_输出:3cc459d6 泄漏relu_8_输出:3cbd9562 网格:3ddc32dc
解决方案
def read_calibration_cache(self, length=0):
if os.path.exists('calibration_cache.bin'):
with open('calibration_cache.bin', 'rb') as f:
return f.read()
return None
我猜这是行得通的。如果您的目录中有calibration_cache.bin 文件,校准器会解析它而不是再次校准。
推荐阅读
- windows - 从以 SYSTEM 身份运行的 Powershell 设置用户环境变量
- java - Android RecyclerView DiffUtil.Callback: areContentsTheSame() 和 getChangePayload() 给出不同的比较结果
- docker - 如何将 docker run 参数添加到 docker compose 文件中?
- javascript - Floara 编辑器插件(列表)按钮未在反应 js 中显示(下一个 JS)
- ionic4 - 自动关注 Ionic Angular 应用程序中的输入字段,同时打开搜索弹出窗口
- c# - Aspose.Cells NuGet 18.3 Worksheet 对象似乎对 .Net Core 有不同的 API
- c# - 这个 C# 泛型约束的作用是什么:public class ValueObject
其中 T : 值对象 - python - 有没有办法来解决这个问题?
- php - Laravel:在刀片内调用 paginate()?
- r - 为什么 ``mean`` 函数不能在函数环境中与 ``group_by %>% summarise`` 一起正常工作?