python - 如何用正则表达式匹配数据
问题描述
我有一个数组列表,如:
data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R',
'R85_R86=10K(9,11K)R']
我想像这样拆分数组列表
SCN: TEST BEGA
STEP R8
CHILD R8
Operator =
MEASURE_CHILD 11K(10,15A)B
STEP R9
CHILD R9
Operator =
MEASURE_CHILD 1K(0,3A)B
STEP R10_R84
CHILD R10_R84
Operator =
MEASURE_CHILD 13MEG(7,14K)R
STEP R85
CHILD R84
Operator <
MEASURE_CHILD 100K(970,1000K)R
CHILD R86
Operator =
MEASURE_CHILD 10K(9,11K)R
我使用这段代码来做这些事情,但我不知道出了什么问题:
def createTreeStandardBloc( self ):
data = ['- TEST BEGA','R8=11K(10,15A)B','R9=1K(0,3A)B','R10_R84=13MEG(7,14K)R','R85_R84<100K(970,1000K)R','R85_R85=10K(9,11K)R']
last_s = None
for i, line in enumerate(data):
if i == 0:
print("SCN:", line.strip("- "))
elif line.strip():
s, c, op, mc = re.match("^\s*([^_]+)(_\w+)?([<>=])(.*)\s*$", line).groups()
if s != last_s:
print("STEP", s)
print("CHILD", c or s)
print("Operator",op)
print("MEASURE_CHILD", mc)
last_s = s
问题是数据 R10_R84 的步长被划分为 R10 的步长和 R84 的子级我希望当前缀像 R85 一样重复时,任何数据都会被划分。
解决方案
我相信其他人会想出一个更好的解决方案,但是就这样吧。
from collections import defaultdict
def get_operator(string):
'''
returns the operator found in the string
'''
operators = '=><'
for i in operators:
if i in string:
return i
return None
def createTreeStandardBloc(data):
# parsed is a default dict of lists which will default
# to an empty list if a new key is added
parsed = defaultdict(list)
# this loop does a few things
for line in data[1:]:
# it gets the operator
oper = get_operator(line)
# splits the line based on the operator
split_line = line.split(oper)
prefixes = split_line[0].split('_')
# if there aren't 2 prefixes
# it sets the child to the first and only prefix
# otherwise it sets it to the second
if len(prefixes) == 1:
child = prefixes[0]
else:
child = prefixes[1]
# then it adds it preformatted to the defaultdict
# this means that any additional items found with
# the same step prefix will just get added onto that step
# as a child
parsed[prefixes[0]].append('CHILD ' + child)
parsed[prefixes[0]].append('Operator ' + oper)
parsed[prefixes[0]].append('MEASURE_CHILD ' + split_line[1])
# here we start the final formatting of data
formatted = []
formatted.append('SCN: ' + data[0].strip('- '))
for key, items in parsed.items():
formatted.append(' ')
# we get the first child prefix here
child_prefix = items[0][6:]
# if the child is different from the step
# and there are only 3 items
# we should join them back together
# I know mutating a collection were iterating over
# is sinful but I did it anyway ;)
if len(items) == 3 and key != child_prefix:
key = key + '_' + child_prefix
items[0] = 'CHILD ' + key
# now we can safely add our step to the formatted list
formatted.append('STEP ' + key)
# and the items
for item in items:
formatted.append(item)
return formatted
data = ['- TEST BEGA',
'R8=11K(10,15A)B',
'R9=1K(0,3A)B',
'R10_R84=13MEG(7,14K)R',
'R85_R84<100K(970,1000K)R',
'R85_R86=10K(9,11K)R']
new_data = createTreeStandardBloc(data)
for line in new_data:
print(line)
推荐阅读
- mysql - 在定义 ON DELETE RESTRICT 时,是否有任何方法可以在 adhoc 基础上使用查询来使用 ON DELETE CASCADE 功能?
- reactjs - 反应功能无法识别
- symfony - Symfony Form + EntityType Field + oTm > mTo < oTm 关系 = 保存时类型错误
- reactjs - 使用 simpleWebRTC 进行存储设置
- usb - C# WPF 在外部 USB 相机按钮上捕获图像
- dart - 使用 Dartson 进行 Json 序列化
- java - 如何在同一个类中创建两个不同的构造函数 - Android
- python - 调试/逐步了解导入模块的工作
- telegram - 电报聊天ID可以做什么?
- java - 为什么我的程序不读取使用同一程序创建的文件?