python - TensorRT 没有使用 float16 (或如何检查?)
问题描述
我强烈怀疑precision_mode='FP16'
什么都不做(tf 1.15)。.pb 文件的大小没有改变,但是在阅读了这个问题后,权重可能仍然是 float32 而 float16 用于计算,我试图检查张量。
这里我们创建 keras 模型
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras import backend as K
import numpy as np
from tensorflow.python.platform import gfile
from tensorflow.python.framework import graph_io
inp = keras.layers.Input(shape=(None,None,3))
x = keras.layers.Conv2D(64, 3, padding='same')(inp)
out = keras.layers.Conv2D(3, 3, padding='same')(x)
model = keras.Model([inp], [out])
model.compile(optimizer='adam', loss='mse')
input_name = model.inputs[0].name
output_name = model.outputs[0].name
print(input_name)
print(output_name)
'''
input_1:0
conv2d_1/BiasAdd:0
'''
# -------------------- SAVING
sess = K.get_session()
output_name = output_name.split(":")[0]
with sess.graph.as_default() as graph:
input_graph_def = graph.as_graph_def()
output_graph_def = tf.graph_util.convert_variables_to_constants(
sess, # The session
input_graph_def, # input_graph_def is useful for retrieving the nodes
output_node_names = [output_name]) #[node.name for node in input_graph_def.node] )
#write the graph
graph_io.write_graph(output_graph_def, '', 'model.pb', as_text=False)
然后使用以下方法将其转换为 tensorrt precision_mode='FP16'
:
import tensorflow as tf
#from tensorflow.contrib import tensorrt as trt
from tensorflow.python.compiler.tensorrt import trt_convert as trt
tf.flags.DEFINE_bool('use_float16', True, 'Whether we want to quantize it to float16.')
output_names = ['conv2d_1/BiasAdd']
def load_graph(file):
with tf.gfile.GFile(file, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
with tf.Graph().as_default() as graph:
tf.import_graph_def(graph_def)
return graph, graph_def
graph, graph_def = load_graph('model.pb')
tensorrt_graph = trt.create_inference_graph(graph_def, outputs=output_names, max_batch_size=1, precision_mode='FP16')
with tf.gfile.GFile('trt_model.pb', 'wb') as f:
f.write(tensorrt_graph.SerializeToString())
转换日志:
2020-10-21 15:54:14.659757: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1304] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 3693 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1050, pci bus id: 0000:01:00.0, compute capability: 6.1)
2020-10-21 15:54:14.661494: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x562666640c80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2020-10-21 15:54:14.661507: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): GeForce GTX 1050, Compute Capability 6.1
2020-10-21 15:54:14.669536: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:786] Optimization results for grappler item: tf_graph
2020-10-21 15:54:14.669560: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:788] constant_folding: Graph size after: 9 nodes (-4), 8 edges (-4), time = 1.469ms.
2020-10-21 15:54:14.669569: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:788] layout: Graph size after: 13 nodes (4), 12 edges (4), time = 0.588ms.
2020-10-21 15:54:14.669575: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:788] constant_folding: Graph size after: 13 nodes (0), 12 edges (0), time = 1.32ms.
2020-10-21 15:54:14.669582: I tensorflow/core/grappler/optimizers/meta_optimizer.cc:788] constant_folding: Graph size after: 13 nodes (0), 12 edges (0), time = 0.784ms.
并加载它,打印张量的类型
from PIL import Image
import numpy as np
import tensorflow as tf
from tensorflow.core.framework import types_pb2, graph_pb2, attr_value_pb2
from tensorflow.tools.graph_transforms import TransformGraph
from google.protobuf import text_format
#tf.flags.DEFINE_bool('use_float16', True, 'Whether we want to quantize it to float16.')
def load_graph(model_path):
graph = tf.Graph()
with graph.as_default():
graph_def = tf.GraphDef()
if model_path.endswith("pb"):
with open(model_path, "rb") as f:
graph_def.ParseFromString(f.read())
else:
with open(model_path, "r") as pf:
text_format.Parse(pf.read(), graph_def)
tf.import_graph_def(graph_def, name="")
sess = tf.Session(graph=graph)
return sess, graph
sess,graph = load_graph('trt_model.pb')
input_name = 'input_1:0'
output_name = 'conv2d_1/BiasAdd:0'
print('---------------Done---------------')
#Test model
test_img_orig = Image.open('test.jpg').convert('RGB')
test_img_orig = (np.array(test_img_orig)/255.).astype(np.float16)
print(sess.graph.get_tensor_by_name(input_name))
print(sess.graph.get_tensor_by_name(output_name))
output_tensor = sess.graph.get_tensor_by_name(output_name)
output = sess.run(output_tensor, {input_name: test_img_orig[np.newaxis, ...]})
print(sess.graph.get_tensor_by_name(input_name))
print(sess.graph.get_tensor_by_name(output_name))
结果是
Tensor("input_1:0", shape=(?, ?, ?, 3), dtype=float32)
Tensor("conv2d_1/BiasAdd:0", shape=(?, ?, ?, 3), dtype=float32)
这意味着模型是 float32。如何使用 tensorrt 为 float16 量化我的模型?
解决方案
推荐阅读
- javascript - 当屏幕尺寸超过一定宽度尺寸时如何运行javascript?
- apache-spark - 资源匮乏下 Kubernetes 上的 Spark 作业无限期等待 SPARK_MIN_EXECUTORS
- c# - 是否允许 Native App 客户端在重定向 URI 中使用任何随机可用端口?
- reactjs - 为什么无论状态是对象还是字符串,React 都会以不同的方式重新渲染?
- javascript - 如何将所有导入的模块存储在一个文件中
- java - 如何限制创建的线程数并等待主线程,直到任何一个线程找到答案?
- docker - 如何有效地为 2 个远程节点使用 Kubernetes
- java - 为什么在 Java 中的类型转换期间允许在右侧使用(已擦除)泛型类型?
- amazon-web-services - 为什么使用 AWS Kinesis Firehose 在 Amazon AWS ElasticSearch 前面
- amazon-s3 - 有没有办法从 SFTP 将文件上传到 Amazon S3