首页 > 解决方案 > VNTrackObjectRequest 从前一帧返回被跟踪对象的位置

问题描述

我正在尝试使用VNTrackObjectRequest跟踪 QR 码,但我无法弄清楚如何将结果映射回前一帧中的 QR 码。

我期望在移动 QR 码的图像上执行请求后,跟踪请求提供的boundingBoxQR 码与提供的图像中的 QR 码相同,并且与第一张图像中 QR 码的位置不同,而是反之亦然。

我想我遗漏了一些导致它无法按预期工作的基本信息?

import Foundation
import XCTest
import Vision

final class ObjectTrackingTests: XCTestCase {
    func testQRCodeDisappearing() throws {
        // Any image with a QR code
        let imageWithQRCode = UIImage(named: "Tracked QR Code Position 1", in: Bundle(for: Self.self), compatibleWith: nil)!
        let pixelBufferWithQRCode = try imageWithQRCode.pixelBuffer()

        // Any image with the same QR code, but with the position shifted
        let imageWithMovedQRCode = UIImage(named: "Tracked QR Code Position 2", in: Bundle(for: Self.self), compatibleWith: nil)!
        let pixelBufferWithMovedQRCode = try imageWithMovedQRCode.pixelBuffer()

        // Any image without a QR code
        let imageWithoutQRCode = UIImage(named: "Tracked QR Code Off Screen", in: Bundle(for: Self.self), compatibleWith: nil)!
        let pixelBufferWithoutQRCode = try imageWithoutQRCode.pixelBuffer()

        let imageRequestHandler = VNSequenceRequestHandler()
        let barcodeRequest = VNDetectBarcodesRequest()

        // Check initial image with QR code

        try imageRequestHandler.perform([barcodeRequest], on: pixelBufferWithQRCode, orientation: .up)

        XCTAssertEqual(barcodeRequest.results?.count, 1)

        let detectedBarcode = try XCTUnwrap(barcodeRequest.results?.first)

        let trackRequest = VNTrackObjectRequest(detectedObjectObservation: detectedBarcode)
        trackRequest.trackingLevel = .accurate

        // Check image with QR code that has moved down

        try imageRequestHandler.perform([barcodeRequest, trackRequest], on: pixelBufferWithMovedQRCode, orientation: .up)

        XCTAssertEqual(barcodeRequest.results?.count, 1)

        let detectedMovedBarcode = try XCTUnwrap(barcodeRequest.results?.first)

        let trackedResult = try XCTUnwrap(trackRequest.results?.first as? VNDetectedObjectObservation)

        XCTAssertEqual(trackedResult.boundingBox, detectedMovedBarcode.boundingBox) // This fails
        XCTAssertNotEqual(trackedResult.boundingBox, detectedBarcode.boundingBox) // This fails

        // Check image without a QR code

        try imageRequestHandler.perform([barcodeRequest, trackRequest], on: pixelBufferWithoutQRCode, orientation: .up)

        XCTAssertTrue(barcodeRequest.results?.isEmpty ?? true)
        XCTAssertTrue(trackRequest.results?.isEmpty ?? true) // This fails
    }
}

// This extension is required for the tests and is provided to make running this example easier.
extension UIImage {
    fileprivate enum PixelBufferConversionError: Error {
        case bufferCreationFailure(result: CVReturn)
    }

    fileprivate func pixelBuffer() throws -> CVPixelBuffer {
        var pixelBuffer: CVPixelBuffer!
        let width = Int(size.width)
        let height = Int(size.height)
        let createBufferResult = CVPixelBufferCreate(
            kCFAllocatorDefault,
            width,
            height,
            kCVPixelFormatType_32ARGB,
            [
                kCVPixelBufferCGImageCompatibilityKey: kCFBooleanTrue,
                kCVPixelBufferCGBitmapContextCompatibilityKey: kCFBooleanTrue
            ] as CFDictionary,
            &pixelBuffer
        )
        guard createBufferResult == kCVReturnSuccess else {
            throw PixelBufferConversionError.bufferCreationFailure(result: createBufferResult)
        }

        CVPixelBufferLockBaseAddress(pixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
        defer {
            CVPixelBufferUnlockBaseAddress(pixelBuffer, CVPixelBufferLockFlags(rawValue: 0))
        }
        let pixelData = CVPixelBufferGetBaseAddress(pixelBuffer)

        let context = CGContext(
            data: pixelData,
            width: width,
            height: height,
            bitsPerComponent: 8,
            bytesPerRow: CVPixelBufferGetBytesPerRow(pixelBuffer),
            space: CGColorSpaceCreateDeviceRGB(),
            bitmapInfo: CGImageAlphaInfo.noneSkipFirst.rawValue
        )!

        context.translateBy(x: 0, y: size.height)
        context.scaleBy(x: 1.0, y: -1.0)

        UIGraphicsPushContext(context)
        draw(in: CGRect(x: 0, y: 0, width: size.width, height: size.height))
        UIGraphicsPopContext()

        return pixelBuffer
    }
}

使用的图像:

标签: iosswiftios-vision

解决方案


推荐阅读