python - 检测图像上的对象边界太慢
问题描述
我有一个label_image
, 作为一个看起来像这样的数据框。请注意,一些对象,那些标记为1
并被3
其他对象完全吞没/包围的对象,没有背景将对象彼此分开。在label_image
这里作为输入给出。我自己没有制作它,因为我没有实际图像(如 tif、jpg 等)
dummy_img = pd.DataFrame(np.array([
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
]))
我想得到物体的轮廓。
我最初写的是这样的:
import pandas as pd
import numpy as np
from scipy.sparse import coo_matrix
from scipy.ndimage import binary_erosion
def outline(label_image):
res_list = []
mask = binary_erosion(label_image.values)
label_image[mask] = 0
c = coo_matrix(label_image)
if c.data.size > 0:
df = pd.DataFrame({'coords': list(zip(c.col, c.row)), 'label': c.data})
df = df.groupby('label')['coords'].apply(lambda group_series: group_series.tolist()).reset_index()
df = df.astype({"label": int})
else:
df = pd.DataFrame()
return df
这给出了一个输出:
label coords
2 [(3, 3), (4, 3), (5, 3), (6, 3), (7, 3), (8, 3...
4 [(12, 3), (13, 3), (14, 3), (15, 3), (16, 3), ...
这是错误的。它不仅错过了内部对象,而且检测到的对象的派生坐标也是错误的。例如label 4
,它返回之间的轮廓labels 4 and 0
并忽略之间的轮廓labels 4 and 3
。
我做了这个修复
def outline_fix(label_image):
res_list = []
coo = coo_matrix(label_image)
labels = np.unique(coo.data)
for label in sorted(set(labels)):
#print('label: %d' % label)
c = coo.copy()
c.data[c.data != label] = 0
c = c.toarray()
mask = binary_erosion(c)
c[mask] = 0
c = coo_matrix(c)
if c.data.size > 0:
df = pd.DataFrame({'coords': list(zip(c.col, c.row)), 'label': c.data})
df = df.groupby('label')['coords'].apply(lambda group_series: group_series.tolist()).reset_index()
df = df.astype({"label": int})
else:
df = pd.DataFrame()
res_list.append(df)
if res_list:
out = pd.concat(res_list).astype({"label": int})
else:
out = pd.DataFrame()
return out
返回:
label coords
1 [(5, 6), (6, 6), (7, 6), (8, 6), (9, 6), (5, 7...
2 [(3, 3), (4, 3), (5, 3), (6, 3), (7, 3), (8, 3...
3 [(12, 5), (13, 5), (14, 5), (10, 6), (11, 6), ...
4 [(12, 3), (13, 3), (14, 3), (15, 3), (16, 3), ...
效果很好。不是 100% 完美,例如,因为label 4
它错过了两对坐标。坐标列表应该有 length=32 而不是我得到的 30,但这是我可以忍受的,它不是很重要。
修正后的功能的问题在于它非常缓慢。在实际情况下,我有一个2000-by-2000
包含多个2800
对象的数组。并非所有对象都是“嵌套的”,但它可能很多,取决于传入的图像。
代码在大约 4 分钟内完成(对于真实的真实案例),这太长了。请问有没有其他选择,或者有什么想法可以加快速度?
非常感谢任何帮助
解决方案
.apply
根据我的经验,当你必须使用函数时,pandas 非常慢。出于这个原因,我更喜欢对其进行操作.values
然后重新分配。话虽如此,熊猫可以制作得非常快,但它本身就是一门艺术。检查此代码,对于大图像,它肯定会比 pandas 实现更快:
import numpy as np
import pandas as pd
from collections import Counter
dummy_img = pd.DataFrame(np.array([
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,1,1,1,1,1,3,3,3,3,3,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,3,3,3,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,2,2,2,2,2,2,2,2,2,4,4,4,4,4,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],
[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
]))
# Array of boundary points
# row - label, col - point coordinates
img_np = dummy_img.values
b_pts = [[] for i in range(4)]
# idea is simple - run a 3x3 window and check whenever the mean value
# inside the window differs from element at the center.
img_padded = np.pad(img_np, ((1, 1), (1, 1)))
for r, row in enumerate(img_np):
for c, elem in enumerate(row):
if Flase in (img_padded[r:r+3, c:c+3] == elem):
b_pts[elem-1].append((r, c))
for elem in b_pts:
print(elem)
推荐阅读
- java - 取链表Java的平均值
- database - 如何使用 Flutter sqflite 查询 SQL Lite 数据库
- r - 给定特定数据帧,如何减去矩阵中的两个值?
- python - 仅当另一列的值满足条件时才获取列的分位数
- haskell - 如何使用数据列表比较和返回数据
- spring-boot - RepositoryRestResource 资源运行一段时间后开始返回 404 错误
- ruby-on-rails - Rails 6设计参数fname,lname被传递给服务器,但没有被插入int数据库
- r - R中的分层k-fold交叉验证
- oauth - Oauth2.0 授权码授予 ClientId & Secret 混淆
- python - Numpy:使用花式索引在 2d 中插入 2 x 1d 的值