android - 在预览camerax android中获取所有文本框
问题描述
我想分析我在预览相机中的框内的所有文本。但是我得到了错误的文本坐标。
验证在 contains 上完成
class TestsPhotoscan : AppCompatActivity() {
private lateinit var binding: ActivityMainBinding
private var scaleX = 1F
private var scaleY = 1F
private var rectCrop = Rect()
private var cameraProvider: ProcessCameraProvider? = null
private lateinit var cameraProviderListenableFuture: ListenableFuture<ProcessCameraProvider>
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
binding = ActivityMainBinding.inflate(layoutInflater)
setContentView(binding.root)
cameraProviderListenableFuture = ProcessCameraProvider.getInstance(this)
// Request camera permissions
if (allPermissionsGranted()) {
startCamera()
} else {
ActivityCompat.requestPermissions(
this,
REQUIRED_PERMISSIONS,
REQUEST_CODE_PERMISSIONS
)
}
binding.borderView.viewTreeObserver.addOnGlobalLayoutListener(object :
ViewTreeObserver.OnGlobalLayoutListener {
override fun onGlobalLayout() {
binding.borderView.viewTreeObserver.removeOnGlobalLayoutListener(this)
val points = IntArray(2)
binding.borderView.getLocationOnScreen(points)
rectCrop = Rect(
points[0],
points[1],
points[0] + binding.borderView.width,
points[1] + binding.borderView.height
)
}
})
}
private fun allPermissionsGranted() = REQUIRED_PERMISSIONS.all {
ContextCompat.checkSelfPermission(
this, it
) == PackageManager.PERMISSION_GRANTED
}
@SuppressLint("UnsafeExperimentalUsageError")
private fun startCamera() {
cameraProviderListenableFuture.addListener(Runnable {
cameraProvider = cameraProviderListenableFuture.get()
binding.viewFinder.post { setupCamera() }
}, ContextCompat.getMainExecutor(this))
}
private fun buildPreviewUseCase(): Preview {
val display = binding.viewFinder.display
val metrics = DisplayMetrics().also { display.getMetrics(it) }
val preview = Preview.Builder()
.setTargetRotation(display.rotation)
.setTargetResolution(Size(metrics.widthPixels, metrics.heightPixels))
.build()
.apply {
setSurfaceProvider(binding.viewFinder.surfaceProvider)
}
return preview
}
private fun setupCamera() {
cameraProviderListenableFuture.addListener({
// Preview
val preview = buildPreviewUseCase()
val imageAnalyzer = ImageAnalysis.Builder()
.build()
.also {
it.setAnalyzer(ContextCompat.getMainExecutor(this),
{ processImage(it) })
}
// Select back camera as a default
val cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA
val useCaseGroup = UseCaseGroup.Builder()
.addUseCase(preview)
.addUseCase(imageAnalyzer)
.build()
try {
// Unbind use cases before rebinding
cameraProvider?.unbindAll()
// Bind use cases to camera
cameraProvider?.bindToLifecycle(
this, cameraSelector, useCaseGroup
)
} catch (exc: Exception) {
Log.e(TAG, "Use case binding failed", exc)
}
}, ContextCompat.getMainExecutor(this))
}
@SuppressLint("UnsafeOptInUsageError")
private fun processImage(imageProxy: ImageProxy) {
setScaleFactor(imageProxy)
recognizeText(
InputImage. fromMediaImage(
imageProxy.image!!,
imageProxy.imageInfo.rotationDegrees
)
).addOnCompleteListener { imageProxy.close() }
}
private fun setScaleFactor(imageProxy: ImageProxy) {
val viewWidth = binding.viewFinder.width.toFloat()
val viewHeight = binding.viewFinder.height.toFloat()
val imageWidth = imageProxy.width.toFloat()
val imageHeight = imageProxy.height
scaleX = viewWidth / imageWidth
scaleY = viewHeight / imageHeight
}
private fun recognizeText(image: InputImage): Task<Text> {
val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)
return recognizer.process(image)
.addOnSuccessListener(
ScopedExecutor(TaskExecutors.MAIN_THREAD),
OnSuccessListener<Text> {
for (block in it.textBlocks) {
for (line in block.lines) {
for (element in line.elements) {
if (rectCrop.contains(
translateX(element.boundingBox?.left ?: -1).roundToInt(),
translateY(element.boundingBox?.top ?: -1).roundToInt()
)
) {
println(element.text)
}
}
}
}
})
}
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<out String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == REQUEST_CODE_PERMISSIONS) {
if (allPermissionsGranted()) {
startCamera()
} else {
Toast.makeText(
this,
"Permissions not granted by the user.",
Toast.LENGTH_SHORT
).show()
// finish()
}
return
}
}
companion object {
private const val TAG = "Mytag"
private const val REQUEST_CODE_PERMISSIONS = 10
private val REQUIRED_PERMISSIONS = arrayOf(Manifest.permission.CAMERA)
}
fun translateX(x: Int) =
x * scaleX
fun translateY(y: Int) = y * scaleY
}
和布局
<?xml version="1.0" encoding="utf-8"?>
<layout xmlns:app="http://schemas.android.com/apk/res-auto">
<androidx.constraintlayout.widget.ConstraintLayout
xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:id="@+id/root"
android:layout_height="match_parent">
<androidx.camera.view.PreviewView
android:id="@+id/viewFinder"
android:layout_width="0dp"
android:layout_height="0dp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<View
android:id="@+id/border_view"
android:layout_width="match_parent"
android:layout_height="250dp"
android:layout_margin="16dp"
android:background="@drawable/background_drawable"
app:layout_constraintBottom_toBottomOf="@+id/viewFinder"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
</androidx.constraintlayout.widget.ConstraintLayout>
</layout>
解决方案
您遇到的困难是从ImageProxy中的图像到PreviewView显示的图像进行良好的映射。虽然这听起来很容易,但我不相信有直接的方法来进行这种映射。请参阅类似问题的答案。我查看了实施此答案中的每个建议,尽管它们在某些情况下有效,但在其他情况下却失败了。当然,我可能采取了错误的方法。
我得出的结论是,提取和分析从预览区域提取的位图并识别那些完全被红色矩形包围的单词是最简单的。我用它们自己的红色矩形框住这些词,以表明它们已被正确识别。
以下是重新设计的活动,一个图形叠加层生成了用于显示的单词框和 XML。注释在代码中。祝你好运!
TestPhotoscan.kt
class TestsPhotoscan : AppCompatActivity() {
private lateinit var binding: ActivityMainBinding
private var wordFenceRect = Rect()
private var cameraProvider: ProcessCameraProvider? = null
private lateinit var cameraProviderListenableFuture: ListenableFuture<ProcessCameraProvider>
override fun onCreate(savedInstanceState: Bundle?) {
super.onCreate(savedInstanceState)
binding = ActivityMainBinding.inflate(layoutInflater)
setContentView(binding.root)
cameraProviderListenableFuture = ProcessCameraProvider.getInstance(this)
// Request camera permissions
if (allPermissionsGranted()) {
startCamera()
} else {
ActivityCompat.requestPermissions(
this,
REQUIRED_PERMISSIONS,
REQUEST_CODE_PERMISSIONS
)
}
}
private fun allPermissionsGranted() = REQUIRED_PERMISSIONS.all {
ContextCompat.checkSelfPermission(
this, it
) == PackageManager.PERMISSION_GRANTED
}
@SuppressLint("UnsafeExperimentalUsageError")
private fun startCamera() {
cameraProviderListenableFuture.addListener({
cameraProvider = cameraProviderListenableFuture.get()
binding.viewFinder.post { setupCamera() }
}, ContextCompat.getMainExecutor(this))
}
private fun buildPreviewUseCase(): Preview {
val display = binding.viewFinder.display
val metrics = DisplayMetrics().also { display.getRealMetrics(it) }
val rotation = display.rotation
return Preview.Builder()
.setTargetResolution(Size(metrics.widthPixels, metrics.heightPixels))
.setTargetRotation(rotation)
.build()
.apply {
setSurfaceProvider(binding.viewFinder.surfaceProvider)
}
}
@SuppressLint("UnsafeOptInUsageError")
private fun setupCamera() {
cameraProviderListenableFuture.addListener({
// Preview
val preview = buildPreviewUseCase()
val imageAnalyzer = ImageAnalysis.Builder()
.build()
.also { it ->
it.setAnalyzer(ContextCompat.getMainExecutor(this),
{ processImage(it) })
}
// Select back camera as a default
val cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA
val useCaseGroup = UseCaseGroup.Builder()
.addUseCase(preview)
.addUseCase(imageAnalyzer)
.build()
try {
// Unbind use cases before rebinding
cameraProvider?.unbindAll()
// Bind use cases to camera
cameraProvider?.bindToLifecycle(
this, cameraSelector, useCaseGroup
)
} catch (exc: Exception) {
Log.e(TAG, "Use case binding failed", exc)
}
}, ContextCompat.getMainExecutor(this))
}
@SuppressLint("UnsafeOptInUsageError")
private fun processImage(imageProxy: ImageProxy) {
// This code will display the image available in the ImageProxy within an inset view
// if the inset view is visible to the user.
//
// The source for ImageUtils is at
// https://github.com/googlesamples/mlkit/blob/master/android/translate-showcase/app/src/main/java/com/google/mlkit/showcase/translate/util/ImageUtils.kt
if (binding.insetView.visibility == View.VISIBLE) {
var imageBitmap = ImageUtils.convertYuv420888ImageToBitmap(imageProxy.image!!)
imageBitmap = rotateBitmap(imageBitmap, imageProxy.imageInfo.rotationDegrees.toFloat())
binding.insetView.setImageBitmap(imageBitmap)
}
// PreviewViews allow access to a bitmap representation of what the preview shows. This is
// just a whole lot easier than mapping the ImageProxy image to what the PreviewView
// displays on the screen. See https://stackoverflow.com/a/63912198/6287910
binding.viewFinder.bitmap?.apply {
recognizeText(
InputImage.fromBitmap(this, 0)
).addOnCompleteListener { imageProxy.close() }
}
}
private fun recognizeText(image: InputImage): Task<Text> {
val recognizer = TextRecognition.getClient(TextRecognizerOptions.DEFAULT_OPTIONS)
return recognizer.process(image)
.addOnSuccessListener(
ScopedExecutor(TaskExecutors.MAIN_THREAD),
{
binding.wordFence.clearBoxes()
binding.wordFence.getHitRect(wordFenceRect)
for (block in it.textBlocks) {
for (line in block.lines) {
for (element in line.elements) {
// For each word, check to make sure that the entire word is
// contained with the word fence.
if (isRectWithinRect(element.boundingBox, wordFenceRect)) {
// Change the box boundary from the coordinate system of the
// parent to the coordinates of the word fence.
val outlineBox = Rect(element.boundingBox)
outlineBox.offset(
-binding.wordFence.left,
-binding.wordFence.top
)
binding.wordFence.addBox(outlineBox)
}
}
}
}
binding.wordFence.invalidate()
})
}
override fun onRequestPermissionsResult(
requestCode: Int,
permissions: Array<out String>,
grantResults: IntArray
) {
super.onRequestPermissionsResult(requestCode, permissions, grantResults)
if (requestCode == REQUEST_CODE_PERMISSIONS) {
if (allPermissionsGranted()) {
startCamera()
} else {
Toast.makeText(
this,
"Permissions not granted by the user.",
Toast.LENGTH_SHORT
).show()
// finish()
}
return
}
}
private fun isRectWithinRect(enclosedRect: Rect?, enclosingRect: Rect) =
enclosedRect != null && enclosingRect.contains(enclosedRect)
private fun rotateBitmap(bitmap: Bitmap, rotation: Float) =
Matrix().run {
preRotate(rotation)
Bitmap.createBitmap(
bitmap, 0, 0, bitmap.width, bitmap.height, this, true
)
}
companion object {
private const val TAG = "Applog"
private const val REQUEST_CODE_PERMISSIONS = 10
private val REQUIRED_PERMISSIONS = arrayOf(Manifest.permission.CAMERA)
}
}
BoxedWordView.kt
class BoxedWordView @JvmOverloads constructor(
context: Context, attrs: AttributeSet? = null, defStyleAttr: Int = 0
) : View(context, attrs, defStyleAttr) {
private val mBoxes = mutableListOf<Rect>()
private val mPaint = Paint().apply {
strokeWidth = 2f
color = context.resources.getColor(android.R.color.holo_red_light)
style = Paint.Style.STROKE
}
override fun onDrawForeground(canvas: Canvas) {
super.onDrawForeground(canvas)
for (box in mBoxes) {
drawBox(canvas, box)
}
}
private fun drawBox(canvas: Canvas, box: Rect) {
canvas.drawRect(box, mPaint)
}
fun addBox(box: Rect) {
mBoxes.add(box)
}
fun clearBoxes() {
mBoxes.clear()
}
}
activity_main.xml
<layout>
<androidx.constraintlayout.widget.ConstraintLayout
android:id="@+id/root"
android:layout_width="match_parent"
android:layout_height="match_parent"
android:background="@android:color/darker_gray">
<androidx.camera.view.PreviewView
android:id="@+id/viewFinder"
android:layout_width="0dp"
android:layout_height="0dp"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintEnd_toEndOf="parent"
app:layout_constraintStart_toStartOf="parent"
app:layout_constraintTop_toTopOf="parent" />
<ImageView
android:id="@+id/insetView"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:visibility="invisible"
app:layout_constraintBottom_toBottomOf="parent"
app:layout_constraintStart_toStartOf="parent"
tools:srcCompat="@tools:sample/backgrounds/scenic" />
<com.example.textrecognition.BoxedWordView
android:id="@+id/wordFence"
android:layout_width="0dp"
android:layout_height="250dp"
android:layout_margin="16dp"
android:background="@drawable/background_drawable"
app:layout_constraintBottom_toBottomOf="@+id/viewFinder"
app:layout_constraintEnd_toEndOf="@id/viewFinder"
app:layout_constraintStart_toStartOf="@id/viewFinder"
app:layout_constraintTop_toTopOf="@id/viewFinder" />
</androidx.constraintlayout.widget.ConstraintLayout>
</layout>
我会提到,从“最近”列表返回时,该应用程序有时会冻结。我可能已经介绍了这个问题,但请注意它。
推荐阅读
- android - Recyclerview 保持与 listview 相同的位置
- python - 在 Pandas 中拆分元组系列
- grails - Grails如何将数据保存在具有belongsTo关联的表中?
- url - 使用正则表达式的 .cfm 文件的 URL 重定向规则
- php - 从 HTML 获取内部嵌套列表
- vim - 映射
在 Lexplore 中不起作用 - java - Java 8 DateTimeFormatter 解析可选部分
- git - 我可以在父目录中同时使用 Git 和 Mercurial - Git,在子目录中使用 Mercurial hg?
- laravel - 没有机器人的 Laravel 会话
- python - 我可以在 model_fn、Estimator、Tensorflow 中使用 python for 循环吗?