python - 如何从 Tensorflow 中的图像中检测多个对象?
问题描述
我是新手tensorflow
。我已经阅读了 Datacamp 的 tensorflow 教程,该教程就在这里,并且能够以 65% 的准确率从我的测试数据中识别出 3 类不同的对象。
它仅识别仅包含 1 个对象的图像中的对象,我想从可能包含多个对象的背景图像中检测多个对象。那么如何建立一个可以从图像中检测多个对象的模型。
我正在使用 TensorFlow-CPU。
为了便于理解,我将下面的代码包括在内。
import tensorflow as tf
from skimage import transform
from skimage import data
import matplotlib.pyplot as plt
import os
import numpy as np
from skimage.color import rgb2gray
import random
import matplotlib.pyplot as plt
def load_data(data_dir):
# Get all subdirectories of data_dir. Each represents a label.
directories = [d for d in os.listdir(data_dir)
if os.path.isdir(os.path.join(data_dir, d))]
# Loop through the label directories and collect the data in
# two lists, labels and images.
labels = []
images = []
for d in directories:
label_dir = os.path.join(data_dir, d)
file_names = [os.path.join(label_dir, f)
for f in os.listdir(label_dir)
if f.endswith(".jpg")]
for f in file_names:
images.append(data.imread(f))
labels.append(int(d))
return images, labels
ROOT_PATH = "C:/...../ImageClasses/"
train_data_dir = os.path.join(ROOT_PATH, "TrainImages")
test_data_dir = os.path.join(ROOT_PATH, "TestImages")
images, labels = load_data(train_data_dir)
images_array = np.array(images)
labels_array = np.array(labels)
# Print the `images` dimensions
print(images_array.ndim)
# Print the number of `images`'s elements
print(images_array.size)
# Print the first instance of `images`
images_array[0]
# Print the `labels` dimensions
print(labels_array.ndim)
# Print the number of `labels`'s elements
print(labels_array.size)
# Count the number of labels
print(len(set(labels_array)))
# Make a histogram with 62 bins of the `labels` data
plt.hist(labels, 62)
# Show the plot
plt.show()
# Determine the (random) indexes of the images that you want to see
traffic_signs = [300, 250, 1150, 1230]
# Fill out the subplots with the random images that you defined
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.axis('off')
plt.imshow(images[traffic_signs[i]])
plt.subplots_adjust(wspace=0.5)
plt.show()
# Determine the (random) indexes of the images
traffic_signs = [300, 225, 365, 400]
# Fill out the subplots with the random images and add shape, min and max values
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.axis('off')
plt.imshow(images[traffic_signs[i]])
plt.subplots_adjust(wspace=0.5)
plt.show()
print("shape: {0}, min: {1}, max: {2}".format(images[traffic_signs[i]].shape,
images[traffic_signs[i]].min(),
images[traffic_signs[i]].max()))
# Resize images
images32 = [transform.resize(image, (28, 28)) for image in images]
images32 = np.array(images32)
#Image Conversion to Grayscale
images32 = rgb2gray(np.array(images32))
for i in range(len(traffic_signs)):
plt.subplot(1, 4, i+1)
plt.axis('off')
plt.imshow(images32[traffic_signs[i]], cmap="gray")
plt.subplots_adjust(wspace=0.5)
plt.show()
print(images32.shape)
x = tf.placeholder(dtype = tf.float32, shape = [None, 28, 28])
y = tf.placeholder(dtype = tf.int32, shape = [None])
images_flat = tf.contrib.layers.flatten(x)
logits = tf.contrib.layers.fully_connected(images_flat, 3, tf.nn.relu)
loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels = y, logits = logits))
train_op = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
correct_pred = tf.argmax(logits, 1)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
print("images_flat: ", images_flat)
print("logits: ", logits)
print("loss: ", loss)
print("predicted_labels: ", correct_pred)
sess = tf.Session()
sess.run(tf.global_variables_initializer())
for i in range(201):
print('EPOCH', i)
_, accuracy_val = sess.run([train_op, accuracy], feed_dict={x: images32, y: labels})
if i % 10 == 0:
print("Loss: ", loss)
print('DONE WITH EPOCH')
# Load the test data
test_images, test_labels = load_data(test_data_dir)
# Transform the images to 28 by 28 pixels
test_images28 = [transform.resize(image, (28, 28)) for image in test_images]
# Convert to grayscale
from skimage.color import rgb2gray
test_images28 = rgb2gray(np.array(test_images28))
# Run predictions against the full test set.
predicted = sess.run([correct_pred], feed_dict={x: test_images28})[0]
# Calculate correct matches
match_count = sum([int(y == y_) for y, y_ in zip(test_labels, predicted)])
# Calculate the accuracy
accuracy = match_count / len(test_labels)
# Print the accuracy
print("Accuracy: {:.3f}".format(accuracy))
sess.close()
解决方案
推荐阅读
- python - 从 HTML 中清除非正文文本
- meteor - Mongo在Meteor Server上插入后如何等待观察者调用
- terraform - 将环境变量列表传递给资源
- php - 由于错误无法加载新页面
- ruby-on-rails - 如何在 ruby docker-api 中将图像作为守护进程运行?
- docker - 使用 docker start 命令时强制容器保持活动状态
- google-apps-script - 如何将特定列的数据从一张表复制到另一张表
- bash - 查找:路径必须在表达式之前:2
- mysql - 为什么我的 `delete where id in` 语句会删除表中的所有记录?
- django - 有没有办法从包含相同外键的表中获取数据?