ios - 如何停止快速检测物体?
问题描述
我目前正在从事 IOS 移动项目,在该项目中检测到框架中的对象,然后将其转换为语音以帮助视障者。我的应用程序已经检测到框架中的对象,但一旦检测到,它就不会停止检测。当我尝试将对象名称转换为语音时,它会一遍又一遍地迭代同一个名称。
为了澄清起见,当我将相机对准“椅子”时,它会给出超过 100 个椅子的日志,其中文本到语音必须说出这 100 个“椅子”,然后才能转到下一个对象。
这是我的viewController
代码:
import UIKit
import Vision
import CoreMedia
import AVFoundation
class ViewController: UIViewController {
@IBOutlet weak var videoPreview: UIView!
@IBOutlet weak var boxesView: DrawingBoundingBoxView!
@IBOutlet weak var labelsTableView: UITableView!
@IBOutlet weak var inferenceLabel: UILabel!
@IBOutlet weak var etimeLabel: UILabel!
@IBOutlet weak var fpsLabel: UILabel!
let objectDectectionModel = MobileNetV2_SSDLite()
// MARK: - Vision Properties
var request: VNCoreMLRequest?
var visionModel: VNCoreMLModel?
var isInferencing = false
// MARK: - AV Property
var videoCapture: VideoCapture!
let semaphore = DispatchSemaphore(value: 1)
var lastExecution = Date()
// MARK: - TableView Data
var predictions: [VNRecognizedObjectObservation] = []
// MARK - Performance Measurement Property
private let measure = Measure()
let maf1 = MovingAverageFilter()
let maf2 = MovingAverageFilter()
let maf3 = MovingAverageFilter()
// MARK: - View Controller Life Cycle
override func viewDidLoad() {
super.viewDidLoad()
// setup the model
setUpModel()
// setup camera
setUpCamera()
// setup delegate for performance measurement
measure.delegate = self
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
self.videoCapture.start()
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
self.videoCapture.stop()
}
// MARK: - Setup Core ML
func setUpModel() {
if let visionModel = try? VNCoreMLModel(for: objectDectectionModel.model) {
self.visionModel = visionModel
request = VNCoreMLRequest(model: visionModel, completionHandler: visionRequestDidComplete)
request?.imageCropAndScaleOption = .scaleFill
} else {
fatalError("fail to create vision model")
}
}
// MARK: - SetUp Video
func setUpCamera() {
videoCapture = VideoCapture()
videoCapture.delegate = self
videoCapture.fps = 30
videoCapture.setUp(sessionPreset: .vga640x480) { success in
if success {
// add preview view on the layer
if let previewLayer = self.videoCapture.previewLayer {
self.videoPreview.layer.addSublayer(previewLayer)
self.resizePreviewLayer()
}
// start video preview when setup is done
self.videoCapture.start()
}
}
}
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
resizePreviewLayer()
}
func resizePreviewLayer() {
videoCapture.previewLayer?.frame = videoPreview.bounds
}
}
// MARK: - VideoCaptureDelegate
extension ViewController: VideoCaptureDelegate {
func videoCapture(_ capture: VideoCapture, didCaptureVideoFrame pixelBuffer: CVPixelBuffer?, timestamp: CMTime) {
// the captured image from camera is contained on pixelBuffer
if !self.isInferencing, let pixelBuffer = pixelBuffer {
self.isInferencing = true
// start of measure
self.measure.start()
// predict!
self.predictUsingVision(pixelBuffer: pixelBuffer)
}
}
}
extension ViewController {
func predictUsingVision(pixelBuffer: CVPixelBuffer) {
guard let request = request else { fatalError() }
// vision framework configures the input size of image following our model's input configuration automatically
self.semaphore.wait()
let handler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer)
try? handler.perform([request])
}
// MARK: - Post-processing
func visionRequestDidComplete(request: VNRequest, error: Error?) {
self.measure.labell(with: "endInference")
if let predictions = request.results as? [VNRecognizedObjectObservation] {
// print(predictions.first?.labels.first?.identifier ?? "nil")
// print(predictions.first?.labels.first?.confidence ?? -1)
let pred = request.results?.first
// print(pred)
// print(predictions.first?.labels.first?.identifier as Any)
// print(predictions)
self.predictions = predictions
DispatchQueue.main.async {
self.boxesView.predictedObjects = predictions
self.labelsTableView.reloadData()
// end of measure
self.measure.end()
self.isInferencing = false
}
} else {
// end of measure
self.measure.end()
self.isInferencing = false
}
self.semaphore.signal()
}
}
extension ViewController: UITableViewDataSource {
func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int {
return predictions.count
}
func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell {
guard let cell = tableView.dequeueReusableCell(withIdentifier: "InfoCell") else {
return UITableViewCell()
}
// Getting the detected object and translating them to speech.
// This is where i face the problem of translating the objects as the objects
// keep iterating over themsleves.
let result = predictions[indexPath.row].label ?? "N/A"
// when trying to print(result) i get all the labells detected but it does not stop.
let utterance = AVSpeechUtterance(string: result)
utterance.voice = AVSpeechSynthesisVoice(language: "en-GB")
utterance.rate = 0.5
let synthesizer = AVSpeechSynthesizer()
synthesizer.speak(utterance)
let rectString = predictions[indexPath.row].boundingBox.toString(digit: 2)
let confidence = predictions[indexPath.row].labels.first?.confidence ?? -1
let confidenceString = String(format: "%.3f", confidence/*Math.sigmoid(confidence)*/)
cell.textLabel?.text = predictions[indexPath.row].label ?? "N/A"
cell.detailTextLabel?.text = "\(rectString), \(confidenceString)"
return cell
}
}
// MARK: - (Performance Measurement) Delegate
extension ViewController: MeasureDelegate {
func updateMeasure(inferenceTime: Double, executionTime: Double, fps: Int) {
//print(executionTime, fps)
DispatchQueue.main.async {
self.maf1.append(element: Int(inferenceTime*1000.0))
self.maf2.append(element: Int(executionTime*1000.0))
self.maf3.append(element: fps)
self.inferenceLabel.text = "inference: \(self.maf1.averageValue) ms"
self.etimeLabel.text = "execution: \(self.maf2.averageValue) ms"
self.fpsLabel.text = "fps: \(self.maf3.averageValue)"
}
}
}
class MovingAverageFilter {
private var arr: [Int] = []
private let maxCount = 10
public func append(element: Int) {
arr.append(element)
if arr.count > maxCount {
arr.removeFirst()
}
}
public var averageValue: Int {
guard !arr.isEmpty else { return 0 }
let sum = arr.reduce(0) { $0 + $1 }
return Int(Double(sum) / Double(arr.count))
}
}
解决方案
似乎你tableView.reloadData()
在每一帧都调用,因为visionRequestDidComplete
每一帧都被调用。因此,cellForRowAtIndexPath
(和其中AVSpeechSynthesizer
)被一遍又一遍地调用,从而产生声音。
您应该重新评估是否需要经常更新您的表格视图。如果有新的观察结果,也许你只需要更新 tableview?您可以predictions
使用visionRequestDidComplete
.
您可能还想使用 Apple 自己的 VoiceOver 系统来读出 UI 元素。这是为视障用户添加支持的标准方法。这也将提供用户可以在 tableview 中导航的好处,并且每个单元格的文本将被相应地读出。