首页 > 解决方案 > 将groundtruths映射到TF图像文件

问题描述

我在将基本事实映射为图像训练集的标签时遇到问题。我的 colab %cd /content 的文件名中有基本事实。我正在尝试拆分空白并标记我的张量

/content/imageset1/ 愤怒 1000.jpg
/content/imageset1/ 愤怒 1011.jpg
/content/imageset1/ 愤怒 1029.jpg
....
/content/imageset1/ 厌恶 14608.jpg

我正在为我的 decode_jpeg_and_label 函数而苦苦挣扎

GCS_PATTERN = '/content/imageset1/*.jpg'
nb_images = len(tf.io.gfile.glob(GCS_PATTERN)) 
GCS_OUTPUT = BUCKET + '/emotions'  # prefix for output file names
BATCHES = 16 # images are split into batches
shard_size = math.ceil(1.0 * nb_images / BATCHES)
TARGET_SIZE = [100, 100] # this is given in the readme, we will use this later
CLASSES = ['Surprise',
          'Fear',
          'Disgust',
          'Happiness',
          'Sadness',
          'Anger',
          'Neutral']
    # groundtruth labels

# functions for writing TFRecord entries
# Feature values are always stored as lists, a single data element will be a list of size 1
def _bytestring_feature(list_of_bytestrings):
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=list_of_bytestrings))

def _int_feature(list_of_ints): # int64
  return tf.train.Feature(int64_list=tf.train.Int64List(value=list_of_ints))  

def to_tfrecord(tfrec_filewriter, img_bytes, label): #, height, width):  
  class_num = np.argmax(np.array(CLASSES)==label) # this comes from our emotion labels 1=surprise
  one_hot_class = np.eye(len(CLASSES))[class_num]     
  feature = {
      "image": _bytestring_feature([img_bytes]), # one image in the list
      "class": _int_feature([class_num]) #,        # one class in the list
  }
  return tf.train.Example(features=tf.train.Features(feature=feature))

def decode_jpeg_and_label(filepath):
    bits = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(bits)
    # parse emotion name from containing directory
    label = tf.strings.split(tf.expand_dims(filepath+'*/*.jpeg', axis=-1), sep=' ')
    label2 = label.values[-2]
    return image, label2

def resize_and_crop_image(image, label):
#Resizing to preserve aspect ratio
  w = tf.shape(image)[0]
  h = tf.shape(image)[1]
  tw = TARGET_SIZE[1]
  th = TARGET_SIZE[0]
  resize_crit = (w * th) / (h * tw)
  image = tf.cond(resize_crit < 1,
                  lambda: tf.image.resize(image, [w*tw/w, h*tw/w]), # if true
                  lambda: tf.image.resize(image, [w*th/h, h*th/h])  # if false
                 )
  nw = tf.shape(image)[0]
  nh = tf.shape(image)[1]
  image = tf.image.crop_to_bounding_box(image, (nw - tw) // 2, (nh - th) // 2, tw, th)
  return image, label

def recompress_image(image, label):
  height = tf.shape(image)[0]
  width = tf.shape(image)[1]
  image = tf.cast(image, tf.uint8)
  image = tf.image.encode_jpeg(image, optimize_size=True, chroma_downsampling=False)
  return image, label, height, width

print("Writing TFRecords")
tt0 = time.time()
filenames = tf.data.Dataset.list_files(GCS_PATTERN) 
dataset1 = filenames.map(decode_jpeg_and_label)
dataset2 = dataset1.map(resize_and_crop_image)  
dataset3 = dataset2.map(recompress_image)
dataset3 = dataset3.batch(shard_size) # sharding: there will be one "batch" of images per file 
for shard, (image, label, height, width) in enumerate(dataset3):
  # batch size used as shard size here
  shard_size = image.numpy().shape[0]
  # number of records in the filename
  filename = GCS_OUTPUT + "{:02d}-{}.tfrec".format(shard, shard_size)
   
  with tf.io.TFRecordWriter(filename) as out_file:
    for i in range(shard_size):
      example = to_tfrecord(out_file,
                            image.numpy()[i], # re-compressed image: already a byte string
                            label.numpy()[i] #, height.numpy()[i], width.numpy()[i]
                            )
      out_file.write(example.SerializeToString())
    print("Wrote file {} containing {} records".format(filename, shard_size))
print("Total time: "+str(time.time()-tt0))

谢谢

标签: tensorflowimage-processing

解决方案


推荐阅读