python-3.x - 用于图像训练的深度学习
问题描述
我尝试训练图像并预测其中的文本。但是当一起训练图像时,我遇到了数组错误。但是现在我正在训练一封信的每个图像,但我遇到了一些错误。图像生成器文件被添加到有助于创建图像并将其导入拟合生成器的位置。
错误:
Using TensorFlow backend.
WARNING: Logging before flag parsing goes to stderr.
W0826 09:18:45.040408 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
W0826 09:18:45.056031 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.
W0826 09:18:45.071652 3428 deprecation_wrapper.py:119] From C:\Users\workspace\test\venv\lib\site-packages\keras\backend\tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.
Traceback (most recent call last):
File "C:/Users/workspace/test/killme.py", line 22, in <module>
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\engine\base_layer.py", line 474, in __call__
output_shape = self.compute_output_shape(input_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 398, in compute_output_shape
input_shape[1:], self.target_shape)
File "C:\Users\workspace\test\venv\lib\site-packages\keras\layers\core.py", line 386, in _fix_unknown_dimension
raise ValueError(msg)
ValueError: total size of new array must be unchanged
代码:
from keras.models import Sequential, Input, Model
from keras.layers import Dense, Reshape
from keras.utils import to_categorical
from keras.layers.convolutional import Conv2D # to add convolutional layers
from keras.layers.convolutional import MaxPooling2D # to add pooling layers
from keras.layers import Flatten # to flatten data for fully connected layers
import string
from generators import ImageGenerator, BasicGenerator
from numpy import reshape
height=20
width=200
font_size=20
i1=Input(shape=(height, width, 1))
character_count=int(width / font_size)
outputs=[]
for i in range(character_count):
o = Dense(len(string.ascii_uppercase), activation='relu')(i1)
o2 = Reshape((len(string.ascii_uppercase), ), name="symbol_{}".format(i+1))(o)
outputs.append(o2)
string_model = Model(inputs=i1, outputs=outputs)
string_model.layers[2].layer.trainable = False
generator = ImageGenerator(height, width, font_size, character_count)
string_model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=["categorical_accuracy"])
string_model.summary()
string_model.fit_generator(generator,epochs=10)
解决方案
在输入密集输出之前,您必须预处理文本数据。转换成vocab
将是一个更好的主意。创建一个CaptionGenerator
使其简单如下。
from vgg16 import VGG16
from keras.applications import inception_v3
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, Embedding, TimeDistributed, Dense, RepeatVector, Merge, Activation, Flatten
from keras.preprocessing import image, sequence
from keras.callbacks import ModelCheckpoint
import cPickle as pickle
EMBEDDING_DIM = 128
class CaptionGenerator():
def __init__(self):
self.max_cap_len = None
self.vocab_size = None
self.index_word = None
self.word_index = None
self.total_samples = None
self.encoded_images = pickle.load( open( "encoded_images.p", "rb" ) )
self.variable_initializer()
def variable_initializer(self):
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
self.total_samples=0
for text in caps:
self.total_samples+=len(text.split())-1
print "Total samples : "+str(self.total_samples)
words = [txt.split() for txt in caps]
unique = []
for word in words:
unique.extend(word)
unique = list(set(unique))
self.vocab_size = len(unique)
self.word_index = {}
self.index_word = {}
for i, word in enumerate(unique):
self.word_index[word]=i
self.index_word[i]=word
max_len = 0
for caption in caps:
if(len(caption.split()) > max_len):
max_len = len(caption.split())
self.max_cap_len = max_len
print "Vocabulary size: "+str(self.vocab_size)
print "Maximum caption length: "+str(self.max_cap_len)
print "Variables initialization done!"
def data_generator(self, batch_size = 32):
partial_caps = []
next_words = []
images = []
print "Generating data..."
gen_count = 0
df = pd.read_csv('Flickr8k_text/flickr_8k_train_dataset.txt', delimiter='\t')
nb_samples = df.shape[0]
iter = df.iterrows()
caps = []
imgs = []
for i in range(nb_samples):
x = iter.next()
caps.append(x[1][1])
imgs.append(x[1][0])
total_count = 0
while 1:
image_counter = -1
for text in caps:
image_counter+=1
current_image = self.encoded_images[imgs[image_counter]]
for i in range(len(text.split())-1):
total_count+=1
partial = [self.word_index[txt] for txt in text.split()[:i+1]]
partial_caps.append(partial)
next = np.zeros(self.vocab_size)
next[self.word_index[text.split()[i+1]]] = 1
next_words.append(next)
images.append(current_image)
if total_count>=batch_size:
next_words = np.asarray(next_words)
images = np.asarray(images)
partial_caps = sequence.pad_sequences(partial_caps, maxlen=self.max_cap_len, padding='post')
total_count = 0
gen_count+=1
print "yielding count: "+str(gen_count)
yield [[images, partial_caps], next_words]
partial_caps = []
next_words = []
images = []
def load_image(self, path):
img = image.load_img(path, target_size=(224,224))
x = image.img_to_array(img)
return np.asarray(x)
def create_model(self, ret_model = False):
#base_model = VGG16(weights='imagenet', include_top=False, input_shape = (224, 224, 3))
#base_model.trainable=False
image_model = Sequential()
#image_model.add(base_model)
#image_model.add(Flatten())
image_model.add(Dense(EMBEDDING_DIM, input_dim = 4096, activation='relu'))
image_model.add(RepeatVector(self.max_cap_len))
lang_model = Sequential()
lang_model.add(Embedding(self.vocab_size, 256, input_length=self.max_cap_len))
lang_model.add(LSTM(256,return_sequences=True))
lang_model.add(TimeDistributed(Dense(EMBEDDING_DIM)))
model = Sequential()
model.add(Merge([image_model, lang_model], mode='concat'))
model.add(LSTM(1000,return_sequences=False))
model.add(Dense(self.vocab_size))
model.add(Activation('softmax'))
print "Model created!"
if(ret_model==True):
return model
model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
return model
def get_word(self,index):
return self.index_word[index]
推荐阅读
- javascript - 删除不在节点列表中工作的类
- java - 反转用户在Java中输入的字符串
- powershell - 通过 powershell 以管理员身份启动 msiexec
- xamarin.forms - 如何将 Rg.Plugins.Popup 用于“SlideDrawer”弹出页面
- azure - 是否可以从 ARM 模板创建 azure 函数并让它从存储在 azure 存储中的 zip 文件中获取代码?
- c# - 带有 Web 浏览器控制问题的 Microsoft Word 2016 VSTO 加载项
- amazon-web-services - 从 lambda 访问 AWS Cloudwatch 事件“附加参数”
- laravel-5 - 发送通知时未使用自定义通知通道
- sql - 带有值oracle sql的where子句中的case语句
- python - 为什么 Python 安装程序不会自动将自身添加到 Windows 上的 PATH 变量中?