首页 > 解决方案 > OP_REQUIRES 在 decode_wav_op.cc:55 失败:参数无效:只能读取 16 位 WAV 文件,但收到 32

问题描述

我正在构建一个音频分类器,将在其中训练 NN 将音频分类为两个类别。以下是我的“train.py”模块:

import os 
import tensorflow as tf
import numpy as np

from configs import PARENT_DIR_TO_SAVE_CHUNKS, VALIDATION_PERCENT_OUT_OF_TOTAL, TEST_PERCENT_OUT_OF_TOTAL, PARENT_DIR_TO_PCM_GOOD_CHUNKS, PARENT_DIR_TO_PCM_BAD_CHUNKS
from utils import decode_audio, do_process_path_of_audio, get_label, get_waveform_and_label, plot, get_spectrogram

def main():
    goods = [os.path.join(PARENT_DIR_TO_SAVE_CHUNKS, "GOOD",file) for file in os.listdir(os.path.join(PARENT_DIR_TO_SAVE_CHUNKS, "GOOD"))]
    bads = [os.path.join(PARENT_DIR_TO_SAVE_CHUNKS, "BAD",file) for file in os.listdir(os.path.join(PARENT_DIR_TO_SAVE_CHUNKS, "BAD"))]
    all = goods + bads
    filenames = tf.convert_to_tensor(all)
    filenames = tf.random.shuffle(filenames)

    print(filenames)
    
    length_of_val_samples = int((VALIDATION_PERCENT_OUT_OF_TOTAL / 100) * len(filenames))
    length_of_test_samples = int((TEST_PERCENT_OUT_OF_TOTAL / 100) * len(filenames))
    
    train_files = filenames[:len(filenames) - length_of_val_samples - length_of_test_samples]
    val_files = filenames[len(filenames) - length_of_val_samples - length_of_test_samples:\
                            len(filenames) - length_of_val_samples]
    test_files = filenames[-(length_of_test_samples):]

    print('Training set size:', len(train_files))
    print('Validation set size:', len(val_files))
    print('Test set size:', len(test_files))

    (files, waveforms) = do_process_path_of_audio(train_files)
    print(waveforms)
    # plot(waveforms)
    # print(waveforms.take(1))
    for waveform, label in waveforms.take(1):
        label = label.numpy().decode('utf-8')
        spectrogram = get_spectrogram(waveform)

    # print('Label:', label)
    # print('Waveform shape:', waveform.shape)
    # print('Spectrogram shape:', spectrogram.shape)
    
    
if __name__ == "__main__":
    
    main()

以下包含我的“preprocess.py”模块中的相关函数:

import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import os

def decode_audio(audio_binary):
  audio, _ = tf.audio.decode_wav(audio_binary)
  print(_)
  print(audio)
  return tf.squeeze(audio, axis=-1)


def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    print(file_path)

    return parts[-2] 


def get_waveform_and_label(file_path):
  label = get_label(file_path)
  audio_binary = tf.io.read_file(file_path)
  waveform = decode_audio(audio_binary)

  return waveform, label

def do_process_path_of_audio(files):
    AUTOTUNE = tf.data.AUTOTUNE
    files_ds = tf.data.Dataset.from_tensor_slices(files)
    waveform_ds = files_ds.map(get_waveform_and_label, num_parallel_calls=AUTOTUNE)
    return (files_ds, waveform_ds)


def plot(waveforms):
    rows = 3
    cols = 3
    n = rows*cols
    fig, axes = plt.subplots(rows, cols, figsize=(10, 12))
    print("======>", waveforms.take(15))
    for i, (audio, label) in enumerate(waveforms.take(n)):
        r = i // cols
        c = i % cols
        ax = axes[r][c]
        ax.plot(audio.numpy())
        ax.set_yticks(np.arange(-1.2, 1.2, 0.2))
        label = label.numpy().decode('utf-8')
        ax.set_title(label)

    plt.show()


def get_spectrogram(waveform):

  zero_padding = tf.zeros([16000] - tf.shape(waveform), dtype=tf.float32)

  waveform = tf.cast(waveform, tf.float32)
  equal_length = tf.concat([waveform, zero_padding], 0)
  spectrogram = tf.signal.stft(
      equal_length, frame_length=255, frame_step=128)
      
  spectrogram = tf.abs(spectrogram)

  return spectrogram

现在,当我到达终点线时:

for waveform, label in waveforms.take(1)

我收到以下错误: OP_REQUIRES 在 decode_wav_op.cc:55 失败:参数无效:只能读取 16 位 WAV 文件,但收到 32

我检查了我的数据集音频的sample_rate,它们都在16000。我在互联网上搜索过,但没有人问过这样的问题。任何想法如何解决这个问题。

标签: tensorflowaudio

解决方案


推荐阅读