python - PCA 无法在 CNN 的整个训练集上运行
问题描述
目标
我有一个 CNN,它接收 36x36 的图形图像并将它们分类为线性、二次或三次。我想减少训练时间并最终使用更高分辨率的图像训练 CNN。
CNN中有3个类,每个类有10,000张训练图像,总共有30,000张训练图像。CNN 的验证准确率为 99.58%。
问题
但是,从上面的例子可以看出,训练集中的大部分像素都是无用的,白色的像素。事实上,平均训练图像是 78.9% 的白色像素。这是在分析无用像素上浪费的大量计算时间。
尝试 #1(9 张图片)
这就是为什么我考虑降维技术,特别是 PCA。在我之前的问题中,我询问了如何在 9 个图像的样本上使用 PCA。在@hafiz031 的帮助和我的一些修改之后,我能够实现这个目标。下面你可以看到 PCA 之前和之后的 9 个样本训练图像(使用 7/9 维度;22% 压缩):
尝试 #2(整个训练集:30,000 张图像)
现在我已经看到 PCA 在小范围内工作,我想在我的整个 30,000 张图像的训练集上加速并执行 PCA,看看它是否会减少我的网络的训练时间(目前 3 个 Epochs & Variable 的训练时间为 135 秒学习率 = 0.01)。
但是,当我尝试在整个训练集上运行 PCA 时,会出现以下错误:
为什么会这样?为什么由于某种原因我仅限于 32 张图片?如何将我的 PCA 扩展到整个训练集?我的完整代码在这里,MWE 在下面。
"""# Import Libraries"""
# Import Libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
"""# Load Dataset"""
import pathlib
dataset_url = "https://barisciencelab.tech/TrainingSet.tar.gz"
data_dir = tf.keras.utils.get_file(origin = dataset_url,
fname = "TrainingSet",
untar = True)
data_dir = pathlib.Path(data_dir)
"""# Display # Images to check"""
print(list(data_dir.glob('*/*.png')))
image_count = len(list(data_dir.glob('*/*.png')))
print(image_count)
"""# Display sample image"""
pip install sklearn
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn.decomposition import PCA
graphs = list(data_dir.glob('*/*.png'))
PIL.Image.open(str(graphs[6]))
"""# Define Image Dimensions & Batch Size"""
batch_size = 32
img_height = 36
img_width = 36
"""# Create Training & Validation Sets (80%, 20%)"""
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="training",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
data_dir,
validation_split=0.2,
subset="validation",
seed=123,
image_size=(img_height, img_width),
batch_size=batch_size)
"""# Define 3 Classes"""
class_names = ['Cubic Sinusoidal', 'Linear Sinusoidal', 'Quadratic Sinusoidal']
print(class_names)
"""# Supervised Learning (9 Samples from the Training Set)"""
!pip install skimage
from skimage import data
from skimage.color import rgb2gray
import matplotlib.pyplot as plt
subGraphs = []
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
for i in range(9):
ax = plt.subplot(3, 3, i + 1)
plt.imshow(images[i].numpy().astype("uint8"))
subGraphs.append(images[i].numpy().astype("uint8"))
plt.title(class_names[labels[i]])
plt.axis("off")
subGraphs = np.array(subGraphs)
print(subGraphs.shape)
grayscale = rgb2gray(subGraphs[1])
print(grayscale.shape)
# NEW LINES
grayscale = rgb2gray(subGraphs)
print(grayscale.shape)
grayscale = grayscale.reshape((grayscale.shape[0], grayscale.shape[1] * grayscale.shape[2]))
print(grayscale.shape)
# REVISED LINES
X=grayscale
pca_oliv = PCA(n_components = 3)
X_proj = pca_oliv.fit_transform(X)
print(np.cumsum(pca_oliv.explained_variance_ratio_))
plt.xlabel('# Dimensions')
plt.ylabel('Explained Variance')
plt.plot(np.cumsum(pca_oliv.explained_variance_ratio_))
plt.figure(figsize=(10, 10))
plt.imshow(np.reshape(pca_oliv.components_, (54,72)), cmap=plt.cm.bone, interpolation='nearest')
X_inv_proj = pca_oliv.inverse_transform(X_proj)
print(X_inv_proj.shape)
for index in range(len(X_inv_proj)): # 9
X_proj_img = np.reshape(X_inv_proj[index],(36,36))
plt.imshow(X_proj_img, cmap=plt.cm.bone, interpolation='nearest')
plt.title(class_names[labels[index]])
plt.axis("off")
plt.show()
for image_batch, labels_batch in train_ds:
print(image_batch.shape)
print(labels_batch.shape)
break
"""# Normalize Inputs (/255px)"""
normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)
normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixels values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)
"""# Architechture of CNN (3 Conv2Ds, 3 MaxPool2Ds, 2 Dense and Flatten)"""
# Commented out IPython magic to ensure Python compatibility.
# Load the TensorBoard notebook extension
# %load_ext tensorboard
import tensorflow as tf
import datetime
# Clear any logs from previous runs
!rm -rf ./logs/
num_classes = 3
# IDENTIFY PARAMETERS FOR Conv2D(A,B, )
model = tf.keras.Sequential([
tf.keras.layers.experimental.preprocessing.Rescaling(1./255),
tf.keras.layers.Conv2D(32, 3, activation='relu'), #32 FILTERS and square stride of size 3
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(32, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Conv2D(32, 3, activation='relu'),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation='relu'),
tf.keras.layers.Dense(num_classes)
])
model.compile(
optimizer='adam',
loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=['accuracy'])
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
epochs = 3
initial_learning_rate = 0.01
decay = initial_learning_rate / epochs
def lr_time_based_decay(epoch, lr):
return lr * 1 / (1 + decay * epoch)
history = model.fit(
train_ds,
validation_data=val_ds,
epochs= epochs,
callbacks= [tensorboard_callback, tf.keras.callbacks.LearningRateScheduler(lr_time_based_decay, verbose=1)]
)
# Commented out IPython magic to ensure Python compatibility.
# %tensorboard --logdir logs/fit
model.summary()
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(epochs)
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Train & Val. Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Train & Val. Loss')
plt.show()
"""# Image Input Array"""
pip install glob2
preds = []
for i in range(0,999):
lin_url = "https://raw.githubusercontent.com/Refath/SinusoidalAnalyzer/main/Validation/Graph_"+str(i)+".png"
lin_path = tf.keras.utils.get_file('Graph_'+str(i)+'.png', origin=lin_url)
img = keras.preprocessing.image.load_img(
lin_path, target_size=(img_height, img_width)
)
img_array = keras.preprocessing.image.img_to_array(img)
img_array = tf.expand_dims(img_array, 0) # Create a batch
predictions = model.predict(img_array)
score = tf.nn.softmax(predictions[0])
print(
"This image most likely belongs to {} with a {:.2f} percent confidence."
.format(class_names[np.argmax(score)], 100 * np.max(score))
)
preds.append(np.argmax(score))
print(preds)
results = []
for i in range(0,len(preds)):
results.append(0)
for i in range(0, len(preds)):
if preds[i] == 0:
results[i] = "Cubic"
elif preds[i] == 1:
results[i] = "Linear"
else:
results[i] = "Quadratic"
print(results)
len(preds)
"""# Confusion Matrix"""
actual = [1, 1, 1, 0, 2, 2, 1, 0, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1, 0, 0, 1, 0, 1, 2, 1, 0, 2, 1, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 1, 0, 1, 1, 0, 0, 2, 1, 0, 2, 0, 2, 2, 1, 1, 2, 2, 2, 0, 1, 2, 1, 2, 2, 1, 2, 1, 2, 0, 2, 0, 1, 1, 0, 2, 2, 0, 0, 0, 0, 1, 1, 2, 0, 2, 0, 2, 0, 1, 2, 0, 0, 1, 0, 1, 0, 1, 1, 0, 2, 0, 1, 1, 2, 0, 0, 0, 1, 2, 1, 1, 1, 2, 2, 2, 2, 0, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1, 1, 0, 1, 1, 0, 2, 2, 0, 1, 2, 1, 2, 0, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 2, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 0, 2, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 1, 0, 1, 1, 0, 0, 0, 2, 2, 0, 2, 0, 1, 2, 2, 2, 0, 0, 2, 0, 1, 0, 2, 2, 2, 1, 2, 0, 2, 2, 0, 0, 1, 0, 0, 2, 0, 2, 2, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 2, 1, 2, 1, 0, 1, 2, 0, 1, 2, 0, 0, 0, 2, 2, 0, 0, 1, 2, 1, 0, 1, 1, 2, 1, 1, 0, 2, 0, 1, 1, 0, 1, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 0, 2, 1, 0, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 0, 2, 2, 2, 1, 0, 1, 0, 2, 1, 0, 2, 2, 2, 0, 2, 2, 2, 1, 2, 0, 1, 1, 2, 2, 0, 1, 1, 2, 0, 2, 2, 0, 2, 1, 0, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, 0, 1, 2, 0, 1, 2, 1, 2, 2, 2, 0, 2, 2, 1, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 0, 1, 0, 2, 2, 1, 2, 0, 2, 0, 0, 0, 1, 1, 2, 1, 1, 2, 0, 0, 1, 2, 0, 1, 1, 1, 1, 0, 2, 2, 0, 1, 0, 0, 2, 2, 0, 0, 1, 2, 1, 1, 1, 1, 2, 0, 1, 0, 0, 0, 1, 0, 2, 1, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 0, 2, 2, 2, 2, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 2, 2, 0, 0, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0, 0, 2, 2, 0, 1, 1, 2, 1, 1, 0, 0, 0, 0, 2, 2, 0, 2, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 2, 1, 2, 2, 0, 1, 2, 1, 0, 0, 2, 2, 1, 0, 1, 0, 0, 0, 1, 2, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 0, 2, 2, 0, 1, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1, 0, 0, 2, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 0, 1, 1, 0, 0, 0, 2, 2, 0, 2, 0, 2, 2, 1, 1, 2, 0, 2, 0, 0, 1, 0, 2, 2, 2, 0, 2, 1, 0, 1, 1, 0, 0, 2, 0, 1, 2, 0, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 0, 0, 1, 1, 1, 2, 1, 2, 2, 2, 2, 0, 1, 2, 0, 2, 2, 0, 0, 0, 1, 2, 1, 0, 1, 1, 2, 2, 1, 1, 0, 2, 2, 1, 0, 1, 0, 2, 1, 0, 1, 1, 2, 2, 2, 1, 0, 0, 2, 0, 1, 0, 1, 0, 0, 1, 2, 0, 2, 0, 2, 2, 1, 1, 2, 2, 2, 0, 1, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 2, 1, 1, 0, 2, 0, 2, 1, 1, 0, 2, 1, 1, 1, 1, 0, 2, 0, 2, 1, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 1, 0, 2, 1, 2, 2, 1, 1, 0, 2, 0, 1, 0, 0, 2, 1, 1, 0, 0, 2, 2, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 2, 0, 0, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 0, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 2, 1, 0, 1, 0, 2, 0, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 2, 0, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 1, 2, 1, 1, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 2, 2, 1, 0, 0, 1, 2, 2, 0, 1, 1, 1, 2, 2, 2, 0, 1, 2, 2, 2, 0, 2, 1, 1, 2, 0, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, 1, 0, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0, 2, 2, 1, 0, 1, 2, 1, 0, 0, 1, 2, 0, 0, 1, 1, 1, 0, 1, 1, 2, 2, 0, 1, 2, 1, 1, 0, 0, 1, 2, 1, 0, 2, 2, 1]
tf.math.confusion_matrix(preds, actual)
# Commented out IPython magic to ensure Python compatibility.
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
# %matplotlib inline
import numpy as np
y_true = actual
y_pred = preds
data = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(data, columns=['Cubic','Linear','Quadratic'], index = ['Cubic','Linear','Quadratic'])
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sn.set(font_scale=1.4)
sn.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 16}, fmt='g')
解决方案
推荐阅读
- multithreading - 机器上的线程数和 ThreadPool 线程数
- firebase - 在 Flutter 中使用 FirebaseAuth 检查用户的身份验证状态
- javascript - Node js 中的 response.on() 方法有什么作用
- java - 如何不允许用户突出显示 JTextArea 中的文本?但是我只能突出显示文本吗?
- android - 使用 Jetpack Benchmark 执行测试时的 INSTALL_FAILED_DUPLICATE_PERMISSION
- python - django.db.utils.OperationalError:没有这样的表:polls_post
- jakarta-ee - 如何测试使用@Resource 注入的数据源
- php - 无需编码即可添加 html
- jpeg - 我有两个被重命名为 jpg 的 gif。一个有效,一个无效。谁能告诉我为什么?
- php - ZendFramework 中的 Zend Registry