首页 > 解决方案 > Tensorflow - ValueError:无法将 NumPy 数组转换为张量(不支持的对象类型列表)。请阅读详情

问题描述

我正在使用 LSTM 进行多类文本分类。对于标题中提到的问题,我尝试了很多解决方案,但我无法使其正常工作。代码 :

vocab_size = 5000
embedding_dim = 200
max_length = 200 #try 150 next time
trunc_type = 'post'
padding_type = 'post'
oov_tok = '<OOV>'


my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\train.txt", "r", encoding="utf8")
content = my_file.read()
articles = content.split("\n") #articles is a list of training articles
my_file.close()

my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\train.labels", "r", encoding="utf8")
content = my_file.read()
labels = content.split("\n") #labels is a list of training labels
my_file.close()


train_articles=[]
for i in articles:
        article = i
        for word in STOPWORDS:
            token = ' ' + word + ' '
            article = article.replace(token, ' ')
            article = article.replace(' ', ' ')
        train_articles.append(article)
train_labels=labels


#PREP VALIDATION SET
my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\val.txt", "r", encoding="utf8")
content = my_file.read()
val_articles = content.split("\n")
my_file.close()
validation_articles=[]
for i in val_articles:
        article = i
        for word in STOPWORDS:
            token = ' ' + word + ' '
            article = article.replace(token, ' ')
            article = article.replace(' ', ' ')
        validation_articles.append(article)
my_file = open("C:\\Users\\Ashish\\Desktop\\pr2_data\\val.labels", "r", encoding="utf8")
content = my_file.read()
validation_labels = content.split("\n")
my_file.close()
labels=train_labels+validation_labels
articles=train_articles+validation_articles
####error free till here



#####train_sequences
tokenizer = Tokenizer(num_words = vocab_size, oov_token=oov_tok)
tokenizer.fit_on_texts(train_articles)
word_index = tokenizer.word_index

train_sequences = tokenizer.texts_to_sequences(train_articles)
train_padded = pad_sequences(train_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)
validation_sequences = tokenizer.texts_to_sequences(validation_articles)
validation_padded = pad_sequences(validation_sequences, maxlen=max_length, padding=padding_type, truncating=trunc_type)


label_tokenizer = Tokenizer()
label_tokenizer.fit_on_texts(labels)
training_label_seq = np.array(label_tokenizer.texts_to_sequences(train_labels))
validation_label_seq = np.array(label_tokenizer.texts_to_sequences(validation_labels))


model = tf.keras.Sequential([
    # Add an Embedding layer expecting input vocab of size 5000, and output embedding dimension of size 64 we set at the top
    tf.keras.layers.Embedding(vocab_size, embedding_dim),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(embedding_dim)),
#    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(32)),
    # use ReLU in place of tanh function since they are very good alternatives of each other.
    tf.keras.layers.Dense(embedding_dim, activation='relu'),
    # Add a Dense layer with 6 units and softmax activation.
    # When we have multiple outputs, softmax convert outputs layers into a probability distribution.
    tf.keras.layers.Dense(12, activation='softmax')
])
model.summary()
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
num_epochs = 10
history = model.fit(train_padded, training_label_seq, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)

错误如下:

File "aa.py", line 97, in <module>
    history = model.fit(train_padded, training_label_seq, epochs=num_epochs, validation_data=(validation_padded, validation_label_seq), verbose=2)
File "C:\Users\Ashish\AppData\Local\Programs\Python\Python38\lib\site-packages\tensorflow\python\framework\constant_op.py", line 98, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)
ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type list).

标签: pythonnumpytensorflowkeraslstm

解决方案


推荐阅读