javascript - tfjs 用滑动窗口预测
问题描述
您好,我已经在 keras 中创建并训练了检测系统,以检测和分类仪表上的模拟数字。系统基于两个神经网络。第一个检测是否有数字,如果有,第二个网络告诉我它是什么数字。因为我知道数字在哪里垂直放置,所以搜索数字的过程是通过从左到右滑动窗口来解决的。
因为我正在做一个 Cordova 混合应用程序,所以这些神经网络被移植到 tensorflow.js。这个解决方案的效果出奇的好,但速度很慢。在我的电脑和 Sony Xperia XZ1 Compact 上大约需要 12 秒。
const CROP_DIMENSION = 64
export default class NumberDetector {
windowWidth = 7
windowMoveBy = 1
windowHeight = 80
isNumberModel = null
numberClassifyModel = null
isInitialized = false
localeServerUrl = null
async run (image) {
if (!this.isInitialized) {
throw new OCRNotInitializedError()
}
/* sliding window */
let finalNumber = ''
let currentSegmentNumbers = []
let currentNumber
let timestampStart = new Date()
for (let i = 0; i < parseInt(100 / this.windowMoveBy); i++) {
let x1 = i * this.byPercentage(image.width, this.windowMoveBy)
// let x2 = x1 + this.byPercentage(image.width, this.windowWidth)
let x2 = this.byPercentage(image.width, this.windowWidth)
let y1 = this.byPercentage(image.height, 100 - this.windowHeight)
let y2 = this.byPercentage(image.height, this.windowHeight)
let canvas = document.createElement('canvas')
canvas.width = CROP_DIMENSION
canvas.height = CROP_DIMENSION
let context = canvas.getContext('2d')
context.drawImage(image, x1, y1, x2, y2, 0, 0, CROP_DIMENSION, CROP_DIMENSION)
let imageArray = context.getImageData(0, 0, CROP_DIMENSION, CROP_DIMENSION).data
let imageArrayGray = this.convertImageArrayToGray(imageArray)
let grayImage = this.convertGrayImageArrayToImage(imageArrayGray)
grayImage = tf.expandDims(grayImage, 0)
if (await this.checkIfNumber(grayImage)) {
window.document.body.appendChild(canvas)
currentNumber = await this.classifyNumber(grayImage)
console.log(currentNumber)
currentSegmentNumbers.push(currentNumber)
} else {
console.log('x')
if (currentSegmentNumbers.length) {
finalNumber += this.getMostOccurrenceNumber(currentSegmentNumbers)
}
currentSegmentNumbers = []
}
}
let timestampEnd = new Date()
console.log('finalNumber', finalNumber, timestampEnd - timestampStart)
return finalNumber
}
init (localeServerUrl) {
this.localeServerUrl = localeServerUrl
let promises = []
promises.push(this.loadIsNumberModel())
promises.push(this.loadNumberClassifyModel())
return Promise.all(promises)
.then(() => {
this.isInitialized = true
})
}
loadNumberClassifyModel () {
let url = 'models/numbers/model.json'
if (this.localeServerUrl) {
url = this.localeServerUrl + '/' + url
}
return tf.loadModel(url)
.then((model) => {
this.numberClassifyModel = model
})
}
loadIsNumberModel () {
let url = 'models/yon/model.json'
if (this.localeServerUrl) {
url = this.localeServerUrl + '/' + url
}
return tf.loadModel(url)
.then((model) => {
this.isNumberModel = model
})
}
convertImageArrayToGray (imageArray) {
/*
* Canvas is RGBA
* Human can see ~30% of red, ~60% green and ~10% blue
* Do weighted average of RGB channels and drop alpha channel
*/
let grayScaleArray = []
let grayValue
for (let i = 0; i < imageArray.length; i += 4) {
// grayValue = (imageArray[i] * 0.3) + (imageArray[i + 1] * 0.6) + (imageArray[i + 2] * 0.1)
grayValue = (imageArray[i] * 0.299) + (imageArray[i + 1] * 0.587) + (imageArray[i + 2] * 0.114)
grayScaleArray.push(grayValue)
}
return grayScaleArray
}
convertGrayImageArrayToImage (imageArray) {
/**
* Make array[64,64,1] (width, height, color channel)
* Read array by rows and map them to 2D
*/
let image = new Array(CROP_DIMENSION).fill(0).map(x => Array(CROP_DIMENSION).fill(0).map(x => Array(1).fill(0)))
let row = 0
let column = 0
for (let i = 0; i < imageArray.length; i++) {
image[row][column][0] = imageArray[i] / 255.0
if (column >= (CROP_DIMENSION - 1)) {
row++
column = 0
} else {
column++
}
}
return image
}
async checkIfNumber (croppedFloatArray) {
let result = this.isNumberModel.predict(croppedFloatArray)
return result.data()
.then((data) => {
return parseInt(this.getIndexWithMaxValue(data))
})
}
async classifyNumber (croppedFloatArray) {
let result = this.numberClassifyModel.predict(croppedFloatArray)
return result.data()
.then((data) => {
return this.getIndexWithMaxValue(data)
})
}
getMostOccurrenceNumber (numbers) {
let occurrenceNumbers = {}
for (let number of numbers) {
if (!occurrenceNumbers[number]) {
occurrenceNumbers[number] = 0
}
occurrenceNumbers[number]++
}
let keys = Object.keys(occurrenceNumbers)
let max = -1
let maxKey = -1
let current
for (let key of keys) {
current = occurrenceNumbers[key]
if (current > max) {
max = current
maxKey = key
}
}
return maxKey
}
byPercentage (size, percentage) {
return parseInt(size / 100 * percentage)
}
getIndexWithMaxValue (array) {
let topVal = 0
let indexOfTopVal = 0
for (let index in array) {
if (array[index] > topVal) {
topVal = array[index]
indexOfTopVal = index
}
}
return indexOfTopVal
}
}
我认为问题在于重复计算特征图。我需要以某种方式给 tensorflow 整个图像并告诉它:计算特征图,然后应用滑动窗口。当前解决方案在 12 秒内生成输出。我真的不知道该怎么做。任何人都可以提供任何例子吗?
解决方案
推荐阅读
- javascript - ExtJS:重新选择相同的值不会触发选择事件
- java - 在java中拆分双引号逗号行
- linux - Shell Scripting - 读取属性文件和两个变量的加法(数学)
- android - 使用 AutoLink 时出现 Textview 问题
- php - 检查日期是否过去
- asp.net - 如何从 ApiController 访问现有的 WebMethod
- xml - Notepad++ XML 如何跨多个文件使用正则表达式标记电子邮件地址?
- bash - 重击 | 主机名的 GREP 列表 | While 和 For 循环
- python - json.decoder.JSONDecodeError:额外数据:第 1 行第 427 列(字符 426)
- angular - Angular 5 - 从另一个组件更改变量