首页 > 解决方案 > PCA 无法在 CNN 的整个训练集上运行

问题描述

目标

我有一个 CNN,它接收 36x36 的图形图像并将它们分类为线性、二次或三次。我想减少训练时间并最终使用更高分辨率的图像训练 CNN。 在此处输入图像描述

CNN中有3个类,每个类有10,000张训练图像,总共有30,000张训练图像。CNN 的验证准确率为 99.58%。

问题

但是,从上面的例子可以看出,训练集中的大部分像素都是无用的,白色的像素。事实上,平均训练图像是 78.9% 的白色像素。这是在分析无用像素上浪费的大量计算时间。

尝试 #1(9 张图片)

这就是为什么我考虑降维技术,特别是 PCA。在我之前的问题中,我询问了如何在 9 个图像的样本上使用 PCA。在@hafiz031 的帮助和我的一些修改之后,我能够实现这个目标。下面你可以看到 PCA 之前和之后的 9 个样本训练图像(使用 7/9 维度;22% 压缩): 在此处输入图像描述

尝试 #2(整个训练集:30,000 张图像)

现在我已经看到 PCA 在小范围内工作,我想在我的整个 30,000 张图像的训练集上加速并执行 PCA,看看它是否会减少我的网络的训练时间(目前 3 个 Epochs & Variable 的训练时间为 135 秒学习率 = 0.01)。

但是,当我尝试在整个训练集上运行 PCA 时,会出现以下错误:

在此处输入图像描述

为什么会这样?为什么由于某种原因我仅限于 32 张图片?如何将我的 PCA 扩展到整个训练集?我的完整代码在这里,MWE 在下面。

"""# Import Libraries"""

# Import Libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Dropout
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
import numpy as np
import matplotlib.pyplot as plt

plt.style.use('fivethirtyeight')

"""# Load Dataset"""

import pathlib
dataset_url = "https://barisciencelab.tech/TrainingSet.tar.gz"
data_dir = tf.keras.utils.get_file(origin = dataset_url,
                                   fname = "TrainingSet",
                                   untar = True)
data_dir = pathlib.Path(data_dir)

"""# Display # Images to check"""

print(list(data_dir.glob('*/*.png')))
image_count = len(list(data_dir.glob('*/*.png')))
print(image_count)

"""# Display sample image"""

pip install sklearn

import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn.decomposition import PCA

graphs = list(data_dir.glob('*/*.png'))
PIL.Image.open(str(graphs[6]))

"""# Define Image Dimensions & Batch Size"""

batch_size = 32
img_height = 36
img_width = 36

"""# Create Training & Validation Sets (80%, 20%)"""

train_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

val_ds = tf.keras.preprocessing.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)

"""# Define 3 Classes"""

class_names = ['Cubic Sinusoidal', 'Linear Sinusoidal', 'Quadratic Sinusoidal']
print(class_names)

"""# Supervised Learning (9 Samples from the Training Set)"""

!pip install skimage

from skimage import data
from skimage.color import rgb2gray

import matplotlib.pyplot as plt

subGraphs = []
plt.rcParams['figure.facecolor'] = 'white'
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    subGraphs.append(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

subGraphs = np.array(subGraphs)
print(subGraphs.shape)

grayscale = rgb2gray(subGraphs[1])
print(grayscale.shape)

# NEW LINES
grayscale = rgb2gray(subGraphs)
print(grayscale.shape)

grayscale = grayscale.reshape((grayscale.shape[0], grayscale.shape[1] * grayscale.shape[2]))
print(grayscale.shape)

# REVISED LINES
X=grayscale 

pca_oliv = PCA(n_components = 3)
X_proj = pca_oliv.fit_transform(X)

print(np.cumsum(pca_oliv.explained_variance_ratio_))

plt.xlabel('# Dimensions')
plt.ylabel('Explained Variance')
plt.plot(np.cumsum(pca_oliv.explained_variance_ratio_))

plt.figure(figsize=(10, 10))
plt.imshow(np.reshape(pca_oliv.components_, (54,72)), cmap=plt.cm.bone, interpolation='nearest')

X_inv_proj = pca_oliv.inverse_transform(X_proj)
print(X_inv_proj.shape)
for index in range(len(X_inv_proj)): # 9                    
  X_proj_img = np.reshape(X_inv_proj[index],(36,36))
  plt.imshow(X_proj_img, cmap=plt.cm.bone, interpolation='nearest')
  plt.title(class_names[labels[index]])
  plt.axis("off")
  plt.show()

for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

"""# Normalize Inputs (/255px)"""

normalization_layer = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)

normalized_ds = train_ds.map(lambda x, y: (normalization_layer(x), y))
image_batch, labels_batch = next(iter(normalized_ds))
first_image = image_batch[0]
# Notice the pixels values are now in `[0,1]`.
print(np.min(first_image), np.max(first_image))

AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

"""# Architechture of CNN (3 Conv2Ds, 3 MaxPool2Ds, 2 Dense and Flatten)"""

# Commented out IPython magic to ensure Python compatibility.
# Load the TensorBoard notebook extension
# %load_ext tensorboard

import tensorflow as tf
import datetime

# Clear any logs from previous runs
!rm -rf ./logs/

num_classes = 3

# IDENTIFY PARAMETERS FOR Conv2D(A,B, )
model = tf.keras.Sequential([
  tf.keras.layers.experimental.preprocessing.Rescaling(1./255),
  tf.keras.layers.Conv2D(32, 3, activation='relu'), #32 FILTERS and square stride of size 3
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Conv2D(32, 3, activation='relu'),
  tf.keras.layers.MaxPooling2D(),
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(num_classes)
])

model.compile(
  optimizer='adam', 
  loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])

log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

epochs = 3

initial_learning_rate = 0.01
decay = initial_learning_rate / epochs

def lr_time_based_decay(epoch, lr):
    return lr * 1 / (1 + decay * epoch)

history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs= epochs,
  callbacks= [tensorboard_callback, tf.keras.callbacks.LearningRateScheduler(lr_time_based_decay, verbose=1)]
)

# Commented out IPython magic to ensure Python compatibility.
# %tensorboard --logdir logs/fit

model.summary()

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Train & Val. Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Train & Val. Loss')
plt.show()

"""# Image Input Array"""

pip install glob2

preds = []

for i in range(0,999):

  lin_url = "https://raw.githubusercontent.com/Refath/SinusoidalAnalyzer/main/Validation/Graph_"+str(i)+".png"
  lin_path = tf.keras.utils.get_file('Graph_'+str(i)+'.png', origin=lin_url)

  img = keras.preprocessing.image.load_img(
      lin_path, target_size=(img_height, img_width)
  )

  img_array = keras.preprocessing.image.img_to_array(img)
  img_array = tf.expand_dims(img_array, 0) # Create a batch

  predictions = model.predict(img_array)
  score = tf.nn.softmax(predictions[0])

  print(
      "This image most likely belongs to {} with a {:.2f} percent confidence."
      .format(class_names[np.argmax(score)], 100 * np.max(score))
  )
  preds.append(np.argmax(score))

print(preds)
results = []

for i in range(0,len(preds)):
  results.append(0)

for i in range(0, len(preds)):
  if preds[i] == 0:
    results[i] = "Cubic"
  elif preds[i] == 1:
    results[i] = "Linear"
  else: 
    results[i] = "Quadratic"

print(results)

len(preds)

"""# Confusion Matrix"""

actual = [1, 1, 1, 0, 2, 2, 1, 0, 2, 0, 1, 1, 1, 0, 1, 2, 1, 1, 0, 0, 1, 0, 1, 2, 1, 0, 2, 1, 0, 2, 2, 2, 0, 0, 2, 0, 0, 2, 1, 0, 1, 1, 0, 0, 2, 1, 0, 2, 0, 2, 2, 1, 1, 2, 2, 2, 0, 1, 2, 1, 2, 2, 1, 2, 1, 2, 0, 2, 0, 1, 1, 0, 2, 2, 0, 0, 0, 0, 1, 1, 2, 0, 2, 0, 2, 0, 1, 2, 0, 0, 1, 0, 1, 0, 1, 1, 0, 2, 0, 1, 1, 2, 0, 0, 0, 1, 2, 1, 1, 1, 2, 2, 2, 2, 0, 2, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 2, 1, 1, 0, 1, 1, 0, 2, 2, 0, 1, 2, 1, 2, 0, 0, 0, 0, 1, 0, 1, 2, 0, 0, 0, 0, 0, 2, 2, 1, 1, 0, 1, 2, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 2, 2, 0, 1, 1, 0, 2, 1, 1, 2, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 2, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 1, 0, 1, 1, 0, 0, 0, 2, 2, 0, 2, 0, 1, 2, 2, 2, 0, 0, 2, 0, 1, 0, 2, 2, 2, 1, 2, 0, 2, 2, 0, 0, 1, 0, 0, 2, 0, 2, 2, 1, 1, 0, 1, 1, 1, 1, 2, 1, 1, 0, 1, 1, 2, 1, 2, 1, 0, 1, 2, 0, 1, 2, 0, 0, 0, 2, 2, 0, 0, 1, 2, 1, 0, 1, 1, 2, 1, 1, 0, 2, 0, 1, 1, 0, 1, 2, 1, 0, 2, 1, 0, 2, 1, 0, 0, 0, 2, 1, 0, 2, 2, 0, 0, 0, 0, 0, 1, 1, 1, 2, 0, 2, 2, 2, 2, 0, 0, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 0, 2, 2, 2, 1, 0, 1, 0, 2, 1, 0, 2, 2, 2, 0, 2, 2, 2, 1, 2, 0, 1, 1, 2, 2, 0, 1, 1, 2, 0, 2, 2, 0, 2, 1, 0, 1, 0, 1, 2, 2, 2, 2, 1, 2, 2, 0, 1, 2, 0, 1, 2, 1, 2, 2, 2, 0, 2, 2, 1, 1, 1, 0, 2, 0, 0, 0, 0, 1, 1, 0, 1, 0, 2, 2, 1, 2, 0, 2, 0, 0, 0, 1, 1, 2, 1, 1, 2, 0, 0, 1, 2, 0, 1, 1, 1, 1, 0, 2, 2, 0, 1, 0, 0, 2, 2, 0, 0, 1, 2, 1, 1, 1, 1, 2, 0, 1, 0, 0, 0, 1, 0, 2, 1, 1, 0, 2, 0, 1, 0, 2, 0, 1, 0, 0, 2, 2, 2, 2, 0, 0, 2, 1, 0, 1, 2, 0, 2, 0, 0, 2, 2, 0, 0, 2, 0, 2, 2, 0, 0, 2, 0, 0, 2, 2, 1, 0, 2, 2, 1, 0, 0, 2, 2, 0, 1, 1, 2, 1, 1, 0, 0, 0, 0, 2, 2, 0, 2, 0, 1, 1, 1, 1, 0, 0, 0, 2, 0, 0, 2, 1, 2, 2, 0, 1, 2, 1, 0, 0, 2, 2, 1, 0, 1, 0, 0, 0, 1, 2, 1, 2, 0, 2, 1, 1, 1, 0, 2, 1, 0, 2, 2, 0, 1, 0, 0, 2, 2, 1, 1, 2, 2, 2, 1, 0, 0, 2, 2, 1, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 2, 2, 0, 1, 1, 0, 0, 0, 2, 2, 0, 2, 0, 2, 2, 1, 1, 2, 0, 2, 0, 0, 1, 0, 2, 2, 2, 0, 2, 1, 0, 1, 1, 0, 0, 2, 0, 1, 2, 0, 2, 1, 0, 1, 2, 1, 1, 1, 2, 1, 1, 2, 2, 1, 0, 0, 1, 1, 1, 2, 1, 2, 2, 2, 2, 0, 1, 2, 0, 2, 2, 0, 0, 0, 1, 2, 1, 0, 1, 1, 2, 2, 1, 1, 0, 2, 2, 1, 0, 1, 0, 2, 1, 0, 1, 1, 2, 2, 2, 1, 0, 0, 2, 0, 1, 0, 1, 0, 0, 1, 2, 0, 2, 0, 2, 2, 1, 1, 2, 2, 2, 0, 1, 2, 2, 2, 2, 0, 1, 1, 1, 1, 0, 1, 1, 2, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 2, 1, 1, 0, 2, 0, 2, 1, 1, 0, 2, 1, 1, 1, 1, 0, 2, 0, 2, 1, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 2, 1, 0, 2, 1, 2, 2, 1, 1, 0, 2, 0, 1, 0, 0, 2, 1, 1, 0, 0, 2, 2, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 2, 1, 2, 0, 0, 1, 2, 2, 2, 1, 2, 1, 2, 1, 2, 2, 1, 0, 2, 0, 2, 1, 0, 1, 1, 1, 2, 2, 2, 1, 0, 1, 0, 2, 0, 0, 2, 1, 1, 1, 1, 2, 0, 0, 2, 1, 1, 0, 2, 1, 2, 0, 0, 1, 1, 0, 2, 2, 0, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 2, 2, 0, 0, 2, 2, 0, 2, 0, 1, 2, 1, 1, 1, 0, 1, 2, 0, 0, 1, 1, 0, 2, 2, 2, 1, 0, 0, 1, 2, 2, 0, 1, 1, 1, 2, 2, 2, 0, 1, 2, 2, 2, 0, 2, 1, 1, 2, 0, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 2, 1, 0, 0, 1, 0, 1, 2, 2, 1, 0, 1, 0, 1, 0, 0, 2, 2, 1, 0, 1, 2, 1, 0, 0, 1, 2, 0, 0, 1, 1, 1, 0, 1, 1, 2, 2, 0, 1, 2, 1, 1, 0, 0, 1, 2, 1, 0, 2, 2, 1]
tf.math.confusion_matrix(preds, actual)

# Commented out IPython magic to ensure Python compatibility.
from sklearn.metrics import confusion_matrix
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
# %matplotlib inline
import numpy as np

y_true = actual
y_pred = preds
data = confusion_matrix(y_true, y_pred)
df_cm = pd.DataFrame(data, columns=['Cubic','Linear','Quadratic'], index = ['Cubic','Linear','Quadratic'])
df_cm.index.name = 'Actual'
df_cm.columns.name = 'Predicted'
plt.figure(figsize = (10,7))
sn.set(font_scale=1.4)
sn.heatmap(df_cm, cmap="Blues", annot=True,annot_kws={"size": 16}, fmt='g')

标签: pythontensorflowmachine-learningclassification

解决方案


推荐阅读