ios - 如何解释 YOLOv4 输出
问题描述
我试图解释YOLOv4
使用TensorFlowLite
. iOS
我已经阅读了一些关于cells and anchors
在输出张量中使用的概念,我想为它实现解析器。我的模型的输出由 2 个张量组成:
float32[1,13,13,255]
float32[1,26,26,255]
我想知道的第一件事是第二个输出中的内容是什么?从我读过的第一个应该包含所有信息。第二个只是更详细的结果还是其他?
好的,但让我们从第一个输出开始。我编写了一个简单的函数,它应该提取所有单元格,然后从这些单元格中提取所有锚点数据。这是它的外观:
let output0 = try localModel.output(at: 0)
guard let output0Floats = [Float](bytes: output0.data) else { return nil }
let numberOfCells = 13
let numberOfAnchors = 3
let numberOfClasses = 80
let anchorSize = (numberOfClasses + 5)
func cellAt(x: Int, y: Int) -> [Float] {
let cellSize = anchorSize * numberOfAnchors
let position = (y * numberOfCells + x) * cellSize
return [Float](output0Floats[position..<position + cellSize])
}
func anchors(in cell: [Float]) -> [[Float]] {
(0..<numberOfAnchors).map { [Float](cell[$0 * anchorSize..<$0 * anchorSize + anchorSize]) }
}
for y in 0..<numberOfCells {
for x in 0..<numberOfCells {
let cell = cellAt(x: x, y: y)
print("Cell: \(x),\(y) contains anchors:")
print(anchors(in: cell))
}
}
...
private extension Array {
init?(bytes: Data) {
guard bytes.count % MemoryLayout<Element>.stride == 0 else { return nil }
self = bytes.withUnsafeBytes { .init($0.bindMemory(to: Element.self)) }
}
}
这是我为单个单元格获得的示例结果:
Cell: 7,12 contains anchors:
[[0.693655, -1.1966848, -0.007975042, -0.3327814, -9.583811, 0.3976263, -6.0192285, -6.329881, -5.8644676, -10.2914715, -9.632221, -8.071436, -6.399925, -5.240812, -8.791572, -5.6437893, -9.8603115, -10.492198, -1.9372412, -7.0640965, -2.6936512, -5.112247, -7.131972, -7.1825066, -7.4413238, -10.401382, -7.5643044, -8.608834, -8.239082, -6.799241, -8.035741, -5.7502255, -8.881622, -7.3571744, -9.315964, -7.925786, -7.7857537, -4.8930154, -8.529579, -7.633353, -8.817726, -7.47082, -8.291334, -4.683982, -4.170734, -6.193165, -7.8437185, -9.854808, -9.490823, -8.272433, -8.434413, -7.765057, -7.149798, -11.194118, -6.5116143, -11.112444, -9.999684, -10.689343, -9.942104, -9.520727, -7.440444, -2.531265, -3.7234814, -7.5839844, -4.550161, -3.031804, -4.616852, -8.832014, -6.0279136, -9.482858, -6.750441, -8.450063, -10.222086, -7.6301804, -7.559189, -10.234117, -6.999834, -7.1350074, -5.308107, -6.2450233, -8.8833885, -9.381562, -3.8812854, -8.868278, -9.988986], [0.4351927, -1.3958519, 0.46428338, -0.39240548, -8.170114, 0.7084342, -7.709829, -5.9856057, -6.808081, -10.644019, -9.912677, -7.3293757, -7.548369, -5.533275, -10.072926, -7.316476, -9.945337, -11.118561, -3.2463353, -10.561513, -5.067392, -7.312641, -8.729989, -9.5539055, -7.58917, -9.886164, -6.5404315, -8.553915, -9.023286, -9.580754, -6.7592535, -8.380334, -8.182065, -7.2239976, -9.276712, -7.5086412, -7.2454534, -7.139829, -8.614485, -7.8158274, -9.850543, -9.123642, -6.8081083, -6.936388, -7.997142, -8.845028, -11.322939, -10.713314, -9.629859, -10.820017, -10.480835, -9.071951, -7.9244685, -12.562474, -7.1654305, -13.456438, -10.116255, -12.255847, -11.530319, -10.3949375, -10.665162, -5.6975913, -4.050809, -10.665826, -2.638548, -3.5531735, -7.0320325, -10.047072, -7.678191, -10.290669, -7.438999, -7.531754, -9.817409, -8.428637, -9.502961, -10.955662, -8.6340065, -5.0168147, -8.593948, -9.412493, -10.816083, -10.903126, -8.81499, -10.449745, -9.069517], [0.025469145, -1.7808459, -0.18256505, -0.70104045, -10.450736, -0.67288893, -5.771856, -5.448979, -6.4159226, -8.777289, -7.960696, -5.3555217, -4.798117, -2.8378687, -7.9489646, -8.255625, -8.968552, -8.036578, -2.46956, -8.458385, -4.8979797, -6.5746903, -7.2408285, -8.574903, -6.8356185, -6.4320874, -6.037178, -7.56021, -7.275848, -8.808907, -3.9019513, -8.835796, -6.360187, -6.5461373, -7.1117754, -6.6027184, -7.280362, -7.1671834, -7.292713, -7.1488175, -7.1398635, -8.180893, -5.797153, -6.3417816, -6.9332256, -8.371075, -9.2042055, -8.602686, -8.072069, -8.1690035, -8.0164175, -6.61691, -6.3536263, -9.318304, -4.5542707, -10.049933, -7.8087454, -9.497473, -9.07455, -8.406244, -7.078502, -5.5775504, -2.3586287, -8.409487, -1.6716739, -3.8225765, -6.9020715, -6.6682305, -5.784493, -8.40492, -7.2747784, -6.392035, -6.4958863, -7.629692, -7.4995623, -8.4432125, -6.7565637, -3.113231, -7.3596015, -8.573539, -8.829562, -8.523581, -8.571439, -8.087017, -7.958835]]
所以单个锚看起来像这样:
[0.693655, -1.1966848, -0.007975042, -0.3327814, -9.583811, 0.3976263, -6.0192285, -6.329881, -5.8644676, -10.2914715, -9.632221, -8.071436, -6.399925, -5.240812, -8.791572, -5.6437893, -9.8603115, -10.492198, -1.9372412, -7.0640965, -2.6936512, -5.112247, -7.131972, -7.1825066, -7.4413238, -10.401382, -7.5643044, -8.608834, -8.239082, -6.799241, -8.035741, -5.7502255, -8.881622, -7.3571744, -9.315964, -7.925786, -7.7857537, -4.8930154, -8.529579, -7.633353, -8.817726, -7.47082, -8.291334, -4.683982, -4.170734, -6.193165, -7.8437185, -9.854808, -9.490823, -8.272433, -8.434413, -7.765057, -7.149798, -11.194118, -6.5116143, -11.112444, -9.999684, -10.689343, -9.942104, -9.520727, -7.440444, -2.531265, -3.7234814, -7.5839844, -4.550161, -3.031804, -4.616852, -8.832014, -6.0279136, -9.482858, -6.750441, -8.450063, -10.222086, -7.6301804, -7.559189, -10.234117, -6.999834, -7.1350074, -5.308107, -6.2450233, -8.8833885, -9.381562, -3.8812854, -8.868278, -9.988986]
现在我无法理解这些数字。根据我的阅读,前 5 个数字应该是:
Confidence、BBoxX、BBoxY、BBoxWidth、BBoxHeight 和其余值是 a 中每个类的概率labelMap
。
但这些数字在我看来完全不正确。信心不应该在0到1之间吗?概率不应该在 0 到 1 之间?得到这些结果,我做错了什么?我在解析这些结果之前使用的代码已经用其他类型的tflite
文件进行了很好的测试,应该没问题。这可能是由于输入像素缓冲区准备不正确imageMean
而使用的吗?imageStd
我不确定构建这个模型使用了哪些值,所以我对这两个值都使用了 127.5。
解决方案
推荐阅读
- metabase - K6 元数据库发布请求正在获取 {"errors":{"database":"value must be an integer."}} 尽管 value 是 integer
- c# - 从 C# 中的排序数据表创建多个 csv
- postgresql - Postgres函数返回Select *来自多个表的结果
- cmake - 如何在 CMakeLists 中添加数据文件的路径?
- linux - 如何在bash脚本中删除回车
- php - 如何使用 mongodb\driver php 获得“local.oplog.rs”计数
- javascript - 如何在 JavaScript 中做基于斜率的物理
- python - 如何使用一行选择一个数组中没有出现在另一个数组中的最小值?
- typescript - Object.defineProperty(Array.prototype .. TypeScript 中的问题
- apache-spark - Datalab BigQuery 数据到 Dataproc Hadoop 字数