首页 > 解决方案 > tfjs 用滑动窗口预测

问题描述

您好,我已经在 keras 中创建并训练了检测系统,以检测和分类仪表上的模拟数字。系统基于两个神经网络。第一个检测是否有数字,如果有,第二个网络告诉我它是什么数字。因为我知道数字在哪里垂直放置,所以搜索数字的过程是通过从左到右滑动窗口来解决的。

因为我正在做一个 Cordova 混合应用程序,所以这些神经网络被移植到 tensorflow.js。这个解决方案的效果出奇的好,但速度很慢。在我的电脑和 Sony Xperia XZ1 Compact 上大约需要 12 秒。

const CROP_DIMENSION = 64


export default class NumberDetector {

    windowWidth = 7
    windowMoveBy = 1
    windowHeight = 80
    isNumberModel = null
    numberClassifyModel = null
    isInitialized = false
    localeServerUrl = null
    async run (image) {
        if (!this.isInitialized) {
            throw new OCRNotInitializedError()
        }
        /* sliding window */
        let finalNumber = ''
        let currentSegmentNumbers = []
        let currentNumber
        let timestampStart = new Date()
        for (let i = 0; i < parseInt(100 / this.windowMoveBy); i++) {
            let x1 = i * this.byPercentage(image.width, this.windowMoveBy)
            // let x2 = x1 + this.byPercentage(image.width, this.windowWidth)
            let x2 = this.byPercentage(image.width, this.windowWidth)
            let y1 = this.byPercentage(image.height, 100 - this.windowHeight)
            let y2 = this.byPercentage(image.height, this.windowHeight)
            let canvas = document.createElement('canvas')
            canvas.width = CROP_DIMENSION
            canvas.height = CROP_DIMENSION
            let context = canvas.getContext('2d')
            context.drawImage(image, x1, y1, x2, y2, 0, 0, CROP_DIMENSION, CROP_DIMENSION)
            let imageArray = context.getImageData(0, 0, CROP_DIMENSION, CROP_DIMENSION).data
            let imageArrayGray = this.convertImageArrayToGray(imageArray)
            let grayImage = this.convertGrayImageArrayToImage(imageArrayGray)
            grayImage = tf.expandDims(grayImage, 0)
            if (await this.checkIfNumber(grayImage)) {
                window.document.body.appendChild(canvas)
                currentNumber = await this.classifyNumber(grayImage)
                console.log(currentNumber)
                currentSegmentNumbers.push(currentNumber)
            } else {
                console.log('x')
                if (currentSegmentNumbers.length) {
                    finalNumber += this.getMostOccurrenceNumber(currentSegmentNumbers)
                }
                currentSegmentNumbers = []
            }
        }
        let timestampEnd = new Date()
        console.log('finalNumber', finalNumber, timestampEnd - timestampStart)
        return finalNumber
    }
    init (localeServerUrl) {
        this.localeServerUrl = localeServerUrl
        let promises = []
        promises.push(this.loadIsNumberModel())
        promises.push(this.loadNumberClassifyModel())
        return Promise.all(promises)
            .then(() => {
                this.isInitialized = true
            })
    }
    loadNumberClassifyModel () {
        let url = 'models/numbers/model.json'
        if (this.localeServerUrl) {
            url = this.localeServerUrl + '/' + url
        }
        return tf.loadModel(url)
            .then((model) => {
                this.numberClassifyModel = model
            })
    }
    loadIsNumberModel () {
        let url = 'models/yon/model.json'
        if (this.localeServerUrl) {
            url = this.localeServerUrl + '/' + url
        }
        return tf.loadModel(url)
            .then((model) => {
                this.isNumberModel = model
            })
    }
    convertImageArrayToGray (imageArray) {
        /*
         * Canvas is RGBA
         * Human can see ~30% of red, ~60% green and ~10% blue
         * Do weighted average of RGB channels and drop alpha channel
         */
        let grayScaleArray = []
        let grayValue
        for (let i = 0; i < imageArray.length; i += 4) {
            // grayValue = (imageArray[i] * 0.3) + (imageArray[i + 1] * 0.6) + (imageArray[i + 2] * 0.1)
            grayValue = (imageArray[i] * 0.299) + (imageArray[i + 1] * 0.587) + (imageArray[i + 2] * 0.114)
            grayScaleArray.push(grayValue)
        }
        return grayScaleArray
    }
    convertGrayImageArrayToImage (imageArray) {
        /**
         * Make array[64,64,1] (width, height, color channel)
         * Read array by rows and map them to 2D
         */
        let image = new Array(CROP_DIMENSION).fill(0).map(x => Array(CROP_DIMENSION).fill(0).map(x => Array(1).fill(0)))
        let row = 0
        let column = 0
        for (let i = 0; i < imageArray.length; i++) {
            image[row][column][0] = imageArray[i] / 255.0
            if (column >= (CROP_DIMENSION - 1)) {
                row++
                column = 0
            } else {
                column++
            }
        }
        return image
    }
    async checkIfNumber (croppedFloatArray) {
        let result = this.isNumberModel.predict(croppedFloatArray)
        return result.data()
            .then((data) => {
                return parseInt(this.getIndexWithMaxValue(data))
            })
    }
    async classifyNumber (croppedFloatArray) {
        let result = this.numberClassifyModel.predict(croppedFloatArray)
        return result.data()
            .then((data) => {
                return this.getIndexWithMaxValue(data)
            })
    }
    getMostOccurrenceNumber (numbers) {
        let occurrenceNumbers = {}
        for (let number of numbers) {
            if (!occurrenceNumbers[number]) {
                occurrenceNumbers[number] = 0
            }
            occurrenceNumbers[number]++
        }
        let keys = Object.keys(occurrenceNumbers)
        let max = -1
        let maxKey = -1
        let current
        for (let key of keys) {
            current = occurrenceNumbers[key]
            if (current > max) {
                max = current
                maxKey = key
            }
        }
        return maxKey
    }
    byPercentage (size, percentage) {
        return parseInt(size / 100 * percentage)
    }
    getIndexWithMaxValue (array) {
        let topVal = 0
        let indexOfTopVal = 0
        for (let index in array) {
            if (array[index] > topVal) {
                topVal = array[index]
                indexOfTopVal = index
            }
        }
        return indexOfTopVal
    }
}

我认为问题在于重复计算特征图。我需要以某种方式给 tensorflow 整个图像并告诉它:计算特征图,然后应用滑动窗口。当前解决方案在 12 秒内生成输出。我真的不知道该怎么做。任何人都可以提供任何例子吗?

标签: javascripttensorflowkerassliding-windowtensorflow.js

解决方案


推荐阅读