首页 > 解决方案 > 具有最小长度的序列


(73, 82)如果序列具有最小长度,则应标记位于两个阈值之间的值序列(12)


df['between']  = df[df['preasure'].between(73, 82)]
df['sequence'] = sequence(df.between, 12)

标签: pythonpandasnumpy


这是一个 numpy 解决方案,我确信可以将其翻译为 pandas(作为读者练习)。

low = 73
high = 82
min_len = 12


mask = ((x >= low) & (x <= high))


swap = np.diff(np.r_[False, mask, False])


ind = np.flatnonzero(swap)


start = ind[::2]
end = ind[1::2]
lens = end - start


lmask = lens >= min_length

要从索引制作掩码,请使用np.cumsum. 在这里,我们基本上将原始掩码替换为阈值掩码:

result = np.zeros(x.shape, dtype=np.int8)
result[start[lmask]] = +1
lmask[-1] = False  # This eliminates the element past the end of the array
result[end[lmask]] = -1
result = np.cumsum(result, out=result).view(bool)



low = 3
high = 7
n = 3

x = np.random.randint(low - 2, high + 2, 20)
# 6, 9, 6, 1, 1, 2, 8, 7, 3, 5, 6, 3, 5, 3, 5, 8, 8, 2, 8, 1
mask = (x >= low) & (x <= high)
# 1, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1

swap = np.diff(np.r_[False, mask, False])
# 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1
ind = np.flatnonzero(swap)
# 0,       3,       6,    8,         12,13,14,   17,   18,   20

start = ind[::2]
# 0, 6, 12, 14, 18
end = ind[1::2]
# 3, 8, 13, 17, 20
lens = end - start
# 3, 2, 1, 3, 2
lmask = lens >= min_length
# 1, 0, 0, 1, 0

result = np.zeros(x.shape, dtype=np.int8)
# 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
result[start[lmask]] = +1
# 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0
lmask[-1] = False  # This eliminates the element past the end of the array
result[end[lmask]] = -1
# 1, 0, 0,-1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,-1, 0, 0
result = np.cumsum(result, out=result).view(bool)
# 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0


def sequence(x, low, high, thresh):
    ind = np.flatnonzero(np.diff(np.r_[False, ((x >= low) & (x <= high)), False]))
    start = ind[::2]
    end = ind[1::2]
    mask = end - start >= thresh
    result = np.zeros(x.size, dtype=np.int8)
    result[start[mask]] = +1
    mask[-1] = False
    result[end[mask]] = -1
    return np.cumsum(result, out=result).view(bool)
