python - TensorFlow:为什么 tf.Dataset.map() 只处理我数据集中的第一个示例?
问题描述
tf.Dataset.map()
我在 TensorFlow 1.12 中有以下用途:
dataset_train = dataset_train.map(lambda x: parse_example(x, width, height, NUM_CLASSES))
dataset_train
包含 592 个示例,但这一行只处理其中一个,正如全局计数器所证明的那样,我在parse_example()
. 为什么它不处理数据集中的所有示例?我急切地运行,但里面.map()
的代码没有急切地执行。非常感谢任何想法!
---------------------------------------------------------------------------------
作为参考,我的主要功能如下所示:
tf.enable_eager_execution()
i = 0 # Global counter
tfrecord_train = "/media/nfs/7_raid/ebos/dataset/material_segmentation_train.record"
dataset_train = tf.data.TFRecordDataset(tfrecord_train)
# Read image widht/height from the TFRecord file
iterator = dataset_train.make_one_shot_iterator()
next_element = iterator.get_next()
parsed_element = np.fromstring(next_element.numpy(), dtype=np.uint8)
example = tf.train.Example.FromString(parsed_element)
height = example.features.feature['image/height'].int64_list.value[0]
width = example.features.feature['image/width'].int64_list.value[0]
dataset_train = dataset_train.map(lambda x: parse_example(x, width, height, NUM_CLASSES))
print(v)
中调用的函数.map()
如下所示:
def parse_example(example_proto, width, height, num_classes):
features = {
'image/encoded': tf.FixedLenFeature((), tf.string),
'image/height': tf.FixedLenFeature((), tf.int64),
'image/width': tf.FixedLenFeature((), tf.int64),
'image/filename': tf.FixedLenFeature((), tf.string),
'image/object/bbox/xmin': tf.VarLenFeature(tf.float32),
'image/object/bbox/xmax': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymin': tf.VarLenFeature(tf.float32),
'image/object/bbox/ymax': tf.VarLenFeature(tf.float32),
'image/object/class/label': tf.VarLenFeature(tf.int64),
'image/object/class/text': tf.VarLenFeature(tf.string),
'image/object/mask': tf.VarLenFeature(tf.string),
'image/depth': tf.FixedLenFeature((), tf.string)
}
global v
v = v + 1
parsed_example = tf.parse_single_example(example_proto, features)
#filename = parsed_example['image/filename'].numpy().decode("utf-8")
# Decode image
image = tf.image.decode_jpeg(parsed_example['image/encoded'])
parsed_example['image/encoded'] = image
# Depth + RGBD
depth = utilities.decode_depth(parsed_example['image/depth'])
parsed_example['image/depth'] = depth
rgbd = tf.concat([tf.image.convert_image_dtype(image, tf.float32), depth], axis=2)
rgbd = tf.reshape(rgbd, shape=tf.stack([height, width, 4]))
parsed_example['image/rgbd'] = rgbd
tag_masks = tf.sparse.to_dense(parsed_example['image/object/mask'], default_value="")
tag_masks = tf.map_fn(utilities.decode_png_mask, tag_masks, dtype=tf.uint8)
tag_masks = tf.reshape(tag_masks, shape=tf.stack([-1, height, width]), name='tag_masks')
# All segmentation now have their mask in mask, their labelmap index in classes_indices and their tagname in classes_text
tag_class_indices = tf.sparse.to_dense(parsed_example['image/object/class/label'])
tag_class_names = tf.sparse.to_dense(parsed_example['image/object/class/text'], default_value="")
onehots = masks_to_onehots_tf(tag_masks, tag_class_indices, num_classes)
parsed_example['image/labels'] = onehots
print(parsed_example['image/labels'].shape)
return parsed_example
最后,masks_to_onehot()
如下:
def masks_to_onehots_tf(tag_masks, tag_class_indices, num_classes):
def onehotify(pixel_tag_masks):
tag_mask_sizes_nozeroidx = tf.where(tf.not_equal(tag_mask_sizes, 0))
tag_mask_sizes_suppressed = tf.gather_nd(tag_mask_sizes, tag_mask_sizes_nozeroidx)
smallest_mask_index = tag_mask_sizes_nozeroidx[tf.argmin(tag_mask_sizes_suppressed)]
onehot = tf.one_hot(smallest_mask_index[0], depth=num_classes, dtype=tf.uint8)
return onehot
tag_mask_sizes = tf.reduce_sum(tag_masks, axis=[1, 2])
image_masks = tf.transpose(tag_masks, perm=[1, 2, 0])
onehots = tf.map_fn(lambda x: tf.map_fn(onehotify, x), image_masks)
return onehots
解决方案
也许,您不能尝试map_fn funciotn
https://www.tensorflow.org/api_docs/python/tf/map_fn - 它用于处理张量。
推荐阅读
- python - 使用 NiFi 执行脚本处理器将文件从 aws s3 位置移动到另一个 aws s3 位置
- performance - 当我将切片参数作为值或指针传递时,为什么会有性能差异?
- angular - 如何在离子手势 API 中获取旋转角度?
- python - tf.divide() 和 tf.cast() 将中断我程序的梯度传播。有没有办法解决这个问题?
- microsoft-graph-api - Microsoft Graph API - 邀请端点每次都返回错误请求
- java - 属性上的 Java XML 转换器错误:前缀“req”的命名空间尚未声明
- python - python中URL中带有变量的最快并行请求
- google-sheets - 列出 Google 表格中工作表的名称
- r - 基于字符串拆分R数据框中的文本
- gis - 如何将范围从 EPSG 3857 转换为 EPSG 26331