tensorflow - 将groundtruths映射到TF图像文件
问题描述
我在将基本事实映射为图像训练集的标签时遇到问题。我的 colab %cd /content 的文件名中有基本事实。我正在尝试拆分空白并标记我的张量
/content/imageset1/ 愤怒 1000.jpg
/content/imageset1/ 愤怒 1011.jpg
/content/imageset1/ 愤怒 1029.jpg
....
/content/imageset1/ 厌恶 14608.jpg
我正在为我的 decode_jpeg_and_label 函数而苦苦挣扎
GCS_PATTERN = '/content/imageset1/*.jpg'
nb_images = len(tf.io.gfile.glob(GCS_PATTERN))
GCS_OUTPUT = BUCKET + '/emotions' # prefix for output file names
BATCHES = 16 # images are split into batches
shard_size = math.ceil(1.0 * nb_images / BATCHES)
TARGET_SIZE = [100, 100] # this is given in the readme, we will use this later
CLASSES = ['Surprise',
'Fear',
'Disgust',
'Happiness',
'Sadness',
'Anger',
'Neutral']
# groundtruth labels
# functions for writing TFRecord entries
# Feature values are always stored as lists, a single data element will be a list of size 1
def _bytestring_feature(list_of_bytestrings):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings))
def _int_feature(list_of_ints): # int64
return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints))
def to_tfrecord(tfrec_filewriter, img_bytes, label): #, height, width):
class_num = np.argmax(np.array(CLASSES)==label) # this comes from our emotion labels 1=surprise
one_hot_class = np.eye(len(CLASSES))[class_num]
feature = {
"image": _bytestring_feature([img_bytes]), # one image in the list
"class": _int_feature([class_num]) #, # one class in the list
}
return tf.train.Example(features=tf.train.Features(feature=feature))
def decode_jpeg_and_label(filepath):
bits = tf.io.read_file(filepath)
image = tf.image.decode_jpeg(bits)
# parse emotion name from containing directory
label = tf.strings.split(tf.expand_dims(filepath+'*/*.jpeg', axis=-1), sep=' ')
label2 = label.values[-2]
return image, label2
def resize_and_crop_image(image, label):
#Resizing to preserve aspect ratio
w = tf.shape(image)[0]
h = tf.shape(image)[1]
tw = TARGET_SIZE[1]
th = TARGET_SIZE[0]
resize_crit = (w * th) / (h * tw)
image = tf.cond(resize_crit < 1,
lambda: tf.image.resize(image, [w*tw/w, h*tw/w]), # if true
lambda: tf.image.resize(image, [w*th/h, h*th/h]) # if false
)
nw = tf.shape(image)[0]
nh = tf.shape(image)[1]
image = tf.image.crop_to_bounding_box(image, (nw - tw) // 2, (nh - th) // 2, tw, th)
return image, label
def recompress_image(image, label):
height = tf.shape(image)[0]
width = tf.shape(image)[1]
image = tf.cast(image, tf.uint8)
image = tf.image.encode_jpeg(image, optimize_size=True, chroma_downsampling=False)
return image, label, height, width
print("Writing TFRecords")
tt0 = time.time()
filenames = tf.data.Dataset.list_files(GCS_PATTERN)
dataset1 = filenames.map(decode_jpeg_and_label)
dataset2 = dataset1.map(resize_and_crop_image)
dataset3 = dataset2.map(recompress_image)
dataset3 = dataset3.batch(shard_size) # sharding: there will be one "batch" of images per file
for shard, (image, label, height, width) in enumerate(dataset3):
# batch size used as shard size here
shard_size = image.numpy().shape[0]
# number of records in the filename
filename = GCS_OUTPUT + "{:02d}-{}.tfrec".format(shard, shard_size)
with tf.io.TFRecordWriter(filename) as out_file:
for i in range(shard_size):
example = to_tfrecord(out_file,
image.numpy()[i], # re-compressed image: already a byte string
label.numpy()[i] #, height.numpy()[i], width.numpy()[i]
)
out_file.write(example.SerializeToString())
print("Wrote file {} containing {} records".format(filename, shard_size))
print("Total time: "+str(time.time()-tt0))
谢谢
解决方案
推荐阅读
- node.js - 违规“消息”处理程序采取
小姐 - javascript - 页面加载前的赛普拉斯运行语句
- python - 为什么迭代器在
- javascript - 如何从返回的查询信息中提取某些信息?
- python - PyQt5中实现widget的功能,无需修改pyuic5生成的文件
- python - 使用自定义排序进行 Numpy 重塑
- python - 我通过意外运行“pip uninstall pip”卸载了 pip。现在,如何重新安装 pip?
- javascript - 如何使用 Vue 类组件访问 VueJS 3 和 Typescript 中的 HTML 引用?
- java - 无法启动服务 jboss.deployment.unit。“LeqadoviProjekt-ear-1.1.ear”。结构:
- python - 我们如何让程序输出一个int?