首页 > 解决方案 > 如何解释 YOLOv4 输出

问题描述

我试图解释YOLOv4使用TensorFlowLite. iOS我已经阅读了一些关于cells and anchors在输出张量中使用的概念,我想为它实现解析器。我的模型的输出由 2 个张量组成:

float32[1,13,13,255]
float32[1,26,26,255]

我想知道的第一件事是第二个输出中的内容是什么?从我读过的第一个应该包含所有信息。第二个只是更详细的结果还是其他?

好的,但让我们从第一个输出开始。我编写了一个简单的函数,它应该提取所有单元格,然后从这些单元格中提取所有锚点数据。这是它的外观:

        let output0 = try localModel.output(at: 0)
        guard let output0Floats = [Float](bytes: output0.data) else { return nil }
        
        let numberOfCells = 13
        let numberOfAnchors = 3
        let numberOfClasses = 80
        let anchorSize = (numberOfClasses + 5)
        
        func cellAt(x: Int, y: Int) -> [Float] {
            let cellSize = anchorSize * numberOfAnchors
            let position = (y * numberOfCells + x) * cellSize
            return [Float](output0Floats[position..<position + cellSize])
        }
        
        func anchors(in cell: [Float]) -> [[Float]] {
            (0..<numberOfAnchors).map { [Float](cell[$0 * anchorSize..<$0 * anchorSize + anchorSize]) }
        }
        
        for y in 0..<numberOfCells {
            for x in 0..<numberOfCells {
                let cell = cellAt(x: x, y: y)
                print("Cell: \(x),\(y) contains anchors:")
                print(anchors(in: cell))
            }
        }

...

private extension Array {

    init?(bytes: Data) {
        guard bytes.count % MemoryLayout<Element>.stride == 0 else { return nil }
        self = bytes.withUnsafeBytes { .init($0.bindMemory(to: Element.self)) }
    }

}

这是我为单个单元格获得的示例结果:

Cell: 7,12 contains anchors:
[[0.693655, -1.1966848, -0.007975042, -0.3327814, -9.583811, 0.3976263, -6.0192285, -6.329881, -5.8644676, -10.2914715, -9.632221, -8.071436, -6.399925, -5.240812, -8.791572, -5.6437893, -9.8603115, -10.492198, -1.9372412, -7.0640965, -2.6936512, -5.112247, -7.131972, -7.1825066, -7.4413238, -10.401382, -7.5643044, -8.608834, -8.239082, -6.799241, -8.035741, -5.7502255, -8.881622, -7.3571744, -9.315964, -7.925786, -7.7857537, -4.8930154, -8.529579, -7.633353, -8.817726, -7.47082, -8.291334, -4.683982, -4.170734, -6.193165, -7.8437185, -9.854808, -9.490823, -8.272433, -8.434413, -7.765057, -7.149798, -11.194118, -6.5116143, -11.112444, -9.999684, -10.689343, -9.942104, -9.520727, -7.440444, -2.531265, -3.7234814, -7.5839844, -4.550161, -3.031804, -4.616852, -8.832014, -6.0279136, -9.482858, -6.750441, -8.450063, -10.222086, -7.6301804, -7.559189, -10.234117, -6.999834, -7.1350074, -5.308107, -6.2450233, -8.8833885, -9.381562, -3.8812854, -8.868278, -9.988986], [0.4351927, -1.3958519, 0.46428338, -0.39240548, -8.170114, 0.7084342, -7.709829, -5.9856057, -6.808081, -10.644019, -9.912677, -7.3293757, -7.548369, -5.533275, -10.072926, -7.316476, -9.945337, -11.118561, -3.2463353, -10.561513, -5.067392, -7.312641, -8.729989, -9.5539055, -7.58917, -9.886164, -6.5404315, -8.553915, -9.023286, -9.580754, -6.7592535, -8.380334, -8.182065, -7.2239976, -9.276712, -7.5086412, -7.2454534, -7.139829, -8.614485, -7.8158274, -9.850543, -9.123642, -6.8081083, -6.936388, -7.997142, -8.845028, -11.322939, -10.713314, -9.629859, -10.820017, -10.480835, -9.071951, -7.9244685, -12.562474, -7.1654305, -13.456438, -10.116255, -12.255847, -11.530319, -10.3949375, -10.665162, -5.6975913, -4.050809, -10.665826, -2.638548, -3.5531735, -7.0320325, -10.047072, -7.678191, -10.290669, -7.438999, -7.531754, -9.817409, -8.428637, -9.502961, -10.955662, -8.6340065, -5.0168147, -8.593948, -9.412493, -10.816083, -10.903126, -8.81499, -10.449745, -9.069517], [0.025469145, -1.7808459, -0.18256505, -0.70104045, -10.450736, -0.67288893, -5.771856, -5.448979, -6.4159226, -8.777289, -7.960696, -5.3555217, -4.798117, -2.8378687, -7.9489646, -8.255625, -8.968552, -8.036578, -2.46956, -8.458385, -4.8979797, -6.5746903, -7.2408285, -8.574903, -6.8356185, -6.4320874, -6.037178, -7.56021, -7.275848, -8.808907, -3.9019513, -8.835796, -6.360187, -6.5461373, -7.1117754, -6.6027184, -7.280362, -7.1671834, -7.292713, -7.1488175, -7.1398635, -8.180893, -5.797153, -6.3417816, -6.9332256, -8.371075, -9.2042055, -8.602686, -8.072069, -8.1690035, -8.0164175, -6.61691, -6.3536263, -9.318304, -4.5542707, -10.049933, -7.8087454, -9.497473, -9.07455, -8.406244, -7.078502, -5.5775504, -2.3586287, -8.409487, -1.6716739, -3.8225765, -6.9020715, -6.6682305, -5.784493, -8.40492, -7.2747784, -6.392035, -6.4958863, -7.629692, -7.4995623, -8.4432125, -6.7565637, -3.113231, -7.3596015, -8.573539, -8.829562, -8.523581, -8.571439, -8.087017, -7.958835]]

所以单个锚看起来像这样:

[0.693655, -1.1966848, -0.007975042, -0.3327814, -9.583811, 0.3976263, -6.0192285, -6.329881, -5.8644676, -10.2914715, -9.632221, -8.071436, -6.399925, -5.240812, -8.791572, -5.6437893, -9.8603115, -10.492198, -1.9372412, -7.0640965, -2.6936512, -5.112247, -7.131972, -7.1825066, -7.4413238, -10.401382, -7.5643044, -8.608834, -8.239082, -6.799241, -8.035741, -5.7502255, -8.881622, -7.3571744, -9.315964, -7.925786, -7.7857537, -4.8930154, -8.529579, -7.633353, -8.817726, -7.47082, -8.291334, -4.683982, -4.170734, -6.193165, -7.8437185, -9.854808, -9.490823, -8.272433, -8.434413, -7.765057, -7.149798, -11.194118, -6.5116143, -11.112444, -9.999684, -10.689343, -9.942104, -9.520727, -7.440444, -2.531265, -3.7234814, -7.5839844, -4.550161, -3.031804, -4.616852, -8.832014, -6.0279136, -9.482858, -6.750441, -8.450063, -10.222086, -7.6301804, -7.559189, -10.234117, -6.999834, -7.1350074, -5.308107, -6.2450233, -8.8833885, -9.381562, -3.8812854, -8.868278, -9.988986]

现在我无法理解这些数字。根据我的阅读,前 5 个数字应该是:

Confidence、BBoxX、BBoxY、BBoxWidth、BBoxHeight 和其余值是 a 中每个类的概率labelMap

但这些数字在我看来完全不正确。信心不应该在0到1之间吗?概率不应该在 0 到 1 之间?得到这些结果,我做错了什么?我在解析这些结果之前使用的代码已经用其他类型的tflite文件进行了很好的测试,应该没问题。这可能是由于输入像素缓冲区准备不正确imageMean而使用的吗?imageStd我不确定构建这个模型使用了哪些值,所以我对这两个值都使用了 127.5。

标签: iosswifttensorflowtensorflow-lite

解决方案


推荐阅读