python - Python将文本文件解析为字典
问题描述
我正在使用 Python 3.6 并假设我的数据如下所示
data = """
Mod 123
Trainee Name: John Doe
Date: 11-11-1111
MULTIPLE CHOICE. Choose the one alternative that best completes the statement or answers the question.
1. 2 + 2 = 4?
a. True
b. False
correct: c
ignore this line
and this one too
2. US Population is _____ Million.
a. 271
b. 292
c. 319
d. 328
correct: d"""
我想要一个如下所示的字典对象
[
{'Question': '2 + 2 = 4', 'Sequence_Number': '1', 'Answers': {'Option_1': 'True', 'Option_2': 'False'}, 'Correct': "Option_1"},
{'Question': 'US Population is _____ Million.', 'Sequence_Number': '2', 'Answers': {'Option_1': '271', 'Option_2': '292', 'Option_3': '319', 'Option_4': '328', 'Correct': "Option_4"}}
]
需要注意的是,选择最多可以达到 a,b,c,d,e (5),并且有一些垃圾内容可以删除,这只是多余的行。
我的代码如下
dt = {}
lt = []
dc = {}
lc = []
d = data.split('\n')
for x in range(len(d)):
line = d[x]
if x < len(d) -1:
nl = d[x+1]
try:
if (nl[0].isdigit() and nl[1] == ".") or (nl[0].isdigit() and nl[1].isdigit() and nl[2] == "."):
next_line = "Question"
except:
next_line = ""
try:
if (line[0].isdigit() and line[1] == ".") or (line[0].isdigit() and line[1].isdigit() and line[2] == ".") :
Question = line.split(". ")[1]
Sequence_Number = line.split(". ")[0]
if line.startswith('a. ') :
dc['Option_1'] = line.split(". ")[1]
if next_line != "Question":
continue
if line.startswith('b. '):
dc['Option_2'] = line.split(". ")[1]
if next_line != "Question":
continue
if line.startswith('c. ') :
dc['Option_3'] = line.split(". ")[1]
if next_line != "Question":
continue
if line.startswith('d. '):
dc['Option_4'] = line.split(". ")[1]
if next_line != "Question":
continue
if line.startswith('e. '):
dc['Option_5'] = line.split(". ")[1]
if next_line != "Question":
continue
if next_line == "Question" :
dt['Question'] = Question
dt['Sequence_Number'] = Sequence_Number
dt["Answers"] = dc
lt.append(dt)
dt = {}
dc = {}
except:
continue
for i in lt:
print(i)
结果看起来像
{'Question': '2 + 2 = 4', 'Sequence_Number': '1', 'Answers': {}}
{'Question': '2 + 2 = 4', 'Sequence_Number': '1', 'Answers': {}}
{'Question': '2 + 2 = 4', 'Sequence_Number': '1', 'Answers': {'Option_1': 'True'}}
{'Question': '2 + 2 = 4', 'Sequence_Number': '1', 'Answers': {'Option_2': 'False'}}
{'Question': '2 + 2 = 4', 'Sequence_Number': '1', 'Answers': {}}
{'Question': '2 + 2 = 4', 'Sequence_Number': '1', 'Answers': {}}
{'Question': 'US Population is _____ Million.', 'Sequence_Number': '2', 'Answers': {}}
{'Question': 'US Population is _____ Million.', 'Sequence_Number': '2', 'Answers': {'Option_1': '271'}}
{'Question': 'US Population is _____ Million.', 'Sequence_Number': '2', 'Answers': {'Option_2': '292'}}
{'Question': 'US Population is _____ Million.', 'Sequence_Number': '2', 'Answers': {'Option_3': '319'}}
{'Question': 'US Population is _____ Million.', 'Sequence_Number': '2', 'Answers': {'Option_4': '328'}}
结果不干净,我似乎无法跳过空字典插入。它一定很简单,我看不到。
谁能帮忙
解决方案
这是一个使用一些正则表达式来解析数据并将结果存储在字典列表中的解决方案:
import re
data = """
Mod 123
Trainee Name: John Doe
Date: 11-11-1111
MULTIPLE CHOICE. Choose the one alternative that best completes the statement or answers the question.
1. 2 + 2 = 4?
a. True
b. False
correct: a
ignore this line
and this one too
2. US Population is _____ Million.
a. 271
b. 292
c. 319
d. 328
correct: d"""
OP = 'abcde'
rv = []
matches = re.findall(r'(\d)\. (.*)\n(([a-e]\. .*\n){1,5})correct: (.*)', data)
for match in matches:
sequence_number, question, options, _, correct = match
options = re.findall(r'[a-z]\. (.*)', options)
rv.append({'Question': question.strip(),
'Sequence_Number': sequence_number.strip(),
'Answers': {f'Option_{idx}': opt.strip() for idx, opt in enumerate(options, start=1)},
'Correct': f'Option_{OP.index(correct.strip()) + 1}',
}
)
print(rv)
输出:
[{'Question': '2 + 2 = 4?', 'Sequence_Number': '1', 'Answers': {'Option_1': 'True', 'Option_2': 'False'}, 'Correct': 'Option_1'},
{'Question': 'US Population is _____ Million.', 'Sequence_Number': '2', 'Answers': {'Option_1': '271', 'Option_2': '292', 'Option_3': '319', 'Option_4': '328'}, 'Correct': 'Option_4'}]
推荐阅读
- mysql - 创建视图但出现错误“视图 SELECT 在 FROM 子句中包含子查询”
- ruby-on-rails - 如何将 id 添加到 Ruby on Rails 的选项中
- php - 无法与主机 smtp.domain.com 建立连接:stream_socket_client():
- python - Tinye MCE 和 Django:HTMLField 工具栏选项(管理员)
- ios - 在 iOS 中放大时 react-native-maps 覆盖图像消失
- r - 有没有办法显示组中的计数和平均值?
- javascript - 如何将扑克牌值链接到 React Native 中的图像?
- google-forms - 如何访问当前提交的 Google 表单调查?
- aem - AEM Query-builder 批量查询
- java - 如何在所有情况下只装箱一次