python - 将逻辑字符串转换为 JSON
问题描述
我想从以下转换:
输入:
"#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
其中'#' - 意图'@' - 实体':'-value
输出:
{"and":[
{"some" : [ {"var":"intents"}, {"==":[{"var":"intent"}]},
"serviceRequest"]},
{"or":[
{"and":[{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"},
"charges"]} ]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"},
"getRoamingCharges"]} ]}]
},{"and":[
{"some" : [ {"var":"entities"}, {"==":[{"var":"entity"}, "plans"]}
]},
{"some" : [ {"var":"entities"}, {"==":[{"var":"value"}, "data
plans"]} ]}
]}
]}
]}
我试过的:
import pyparsing
identifier = pyparsing.QuotedString('"')
operator = (
pyparsing.Literal("==") |
pyparsing.Literal("≠") |
pyparsing.Literal("≥") |
pyparsing.Literal("≤") |
pyparsing.Literal("<") |
pyparsing.Literal(">")
)
value = pyparsing.QuotedString('"')
match_format = identifier + operator + value
#print(match_format.parseString('"foobar"=="123"'))
def list_to_dict(pos, tokens):
dic = {}
lis =[]
print(tokens)
abc= {tokens[1]: {tokens[2], tokens[0]}}
print(abc)
lis.append(abc)
dic['bfeh']=lis
return tokens
match_format = (identifier + operator +
value).setParseAction(list_to_dict)
print(match_format.parseString('"intent"=="serviceRequest"'))
给出:
{'==': {'intent', 'serviceRequest'}}
请帮助我使用解析(Python)或任何您想要的替代方法?
解决方案
您可以创建一个更简单的标记器来与解析器链接:
import re
class Token:
grammar, _types = r'and|or|#|:|@|\w+', [('and', 'cond'), ('or', 'cond'), ('#', 'intent'), ('@', 'entity'), (':', 'value'), (r'\w+', 'label')]
def __init__(self, val, _type):
self.val, self._type = val, _type
@property
def is_cond(self):
return self._type == 'cond'
@property
def is_desc(self):
return self._type in {'intent', 'entity', 'value'}
@property
def var_name(self):
return f'{self._type}s' if self._type == 'intent' else 'entities'
@classmethod
def tokenize(cls, _input):
return [cls(i, [b for a, b in cls._types if re.findall(a, i)][0]) for i in re.findall(cls.grammar, _input)]
def __repr__(self):
return f'{self.__class__.__name__}(value={self.val}, type={self._type})'
现在,可以创建一个简单的解析器:
from itertools import groupby
class AST:
def __init__(self, stream):
self.stream = iter(stream)
def p_parse(self, stream):
_r, _id, _name = [], None, ''
for i in stream:
if i._type == 'value':
if _name:
_r.append([{'var':_id.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
_id, _name = None, ''
elif i.is_desc:
_id = i
else:
_name = i.val
_r.append([{'var':i.var_name if _id is not None else 'entities'}, {"==":[{"var":'value' if _id is None else _id._type}, _name]}])
_id, _name = None, ''
return {'some':_r[0]} if len(_r) == 1 else {'and':[{'some':_r[0]}, {'some':_r[1]}]}
def parse(self, seen=None):
a, b = next(self.stream, [None, None])
if a is not None:
return self.parse(self.p_parse(b)) if not a else {b[0].val:[seen, self.parse()]}
return seen
@classmethod
def _group(cls, _tokens):
return cls([(a, list(b)) for a, b in groupby(_tokens, key=lambda x:x.is_cond)])
现在,组合组件:
s = "#serviceRequest and @charges:getRoamingCharges or @plans:dataplans"
result = AST._group(Token.tokenize(s)).parse()
输出:
{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'intent'}, 'serviceRequest']}]}, {'or': [{'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'charges']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'getRoamingCharges']}]}]}, {'and': [{'some': [{'var': 'entities'}, {'==': [{'var': 'entity'}, 'plans']}]}, {'some': [{'var': 'entities'}, {'==': [{'var': 'value'}, 'dataplans']}]}]}]}]}
毫无疑问,这个问题有更短的解决方案,但是,标记器和解析器的目标是让您在未来更容易扩展此解决方案以处理无法被更多“hackish”解决方案处理的输入。
推荐阅读
- forms - TYPO3 8.7.17 EXT:form - 使用 DB 寄存器将选择添加到自定义修整器
- https - 用于 HTTPS 的气流 SimpleHttpOperator
- c++ - 模板类变量作为非模板类的成员
- jenkins - 基于groovy的Jenkins Pipeline:无法推送到git:权限被拒绝(公钥)
- javascript - 在表格中,将属性添加到列中的单元格,其中包括包含特定文本的单元格
- java - Java8中带有流的嵌套列表
- python - 我试图打印出 1!到10!但是遇到了runtimeError
- azure - 授权错误 azure 容器服务
- firebase - firestore 安全规则是否允许我更改请求数据或已经存在的数据?
- php - Laravel 弹出框包含链接悬停时使用 AJAX 的动态数据