sql - 如何使用 Hive 解析多个嵌套的 JSON 数组
问题描述
{
"base": {
"code": "xm",
"name": "project"
},
"list": [{
"ACode": "cp1",
"AName": "Product1",
"BList": [{
"BCode": "gn1",
"BName": "Feature1"
}, {
"BCode": "gn2",
"BName": "Feature2"
}]
}, {
"ACode": "cp2",
"AName": "Product2",
"BList": [{
"BCode": "gn1",
"BName": "Feature1"
}]
}]
}
像这样的JSON,想要得到这个
| code | name | ACode | Aname | Bcode | Bname |
| ---- | ------- | ----- | -------- | ----- | -------- |
| xm | project | cp1 | Product1 | gn1 | Feature1 |
| xm | project | cp1 | Product1 | gn2 | Feature2 |
| xm | project | cp2 | Product2 | gn1 | Feature1 |
我尝试使用这个
SELECT
code
, name
, get_json_object(t.list, '$.[*].ACode') AS ACode
, get_json_object(t.list, '$.[*].AName') AS AName
, get_json_object(t.list, '$.[*].BList[*].BCode') AS BCode
, get_json_object(t.list, '$.[*].BList[*].BName') AS BName
FROM
(
SELECT
get_json_object(t.value, '$.base.code') AS code
, get_json_object(t.value, '$.base.name') AS name
, get_json_object(t.value, '$.list') AS list
FROM
(
SELECT
'{"base":{"code":"xm","name":"project"},"list":[{"ACode":"cp1","AName":"Product1","BList":[{"BCode":"gn1","BName":"Feature1"},{"BCode":"gn2","BName":"Feature2"}]},{"ACode":"cp2","AName":"Product2","BList":[{"BCode":"gn1","BName":"Feature1"}]}]}' as value
)
t
)
t
;
得到这个
xm project ["cp1","cp2"] ["Product1","Product2"] ["gn1","gn2","gn1"] ["Feature1","Feature2","Feature1"]
但我发现它会生成六行。似乎有一个笛卡尔积。我尝试使用 split(string, "\},\{"),但这会同时拆分内层。所以我希望能得到帮助。
解决方案
我解决!!
SELECT
code
, name
, ai.ACode
, ai.AName
, p.BCode
, p.BName
FROM
(
SELECT
get_json_object(t.value, '$.base.code') AS code
, get_json_object(t.value, '$.base.name') AS name
, get_json_object(t.value, '$.list') AS list
FROM
(
SELECT
'{"base":{"code":"xm","name":"project"},"list":[{"ACode":"cp1","AName":"Product1","BList":[{"BCode":"gn1","BName":"Feature1"},{"BCode":"gn2","BName":"Feature2"}]},{"ACode":"cp2","AName":"Product2","BList":[{"BCode":"gn1","BName":"Feature1"}]}]}' as value
)
t
)
t
lateral view explode(split(regexp_replace(regexp_extract(list,'^\\[(.+)\\]$',1),'\\}\\]\\}\\,\\{', '\\}\\]\\}\\|\\|\\{'),'\\|\\|')) list as a
lateral view json_tuple(a,'ACode','AName','BList') ai as ACode
, AName
, BList
lateral view explode(split(regexp_replace(regexp_extract(BList,'^\\[(.+)\\]$',1),'\\}\\,\\{', '\\}\\|\\|\\{'),'\\|\\|')) BList as b
lateral view json_tuple(b,'BCode','BName') p as BCode
, BName
推荐阅读
- c# - Specflow - 无法在功能文件之间共享步骤
- boost - boost::iostreams::gzip 压缩器 - crc 错误?
- cassandra - 时间排序数据属性的 Cassandra IN 条件
- c# - 将 svg base64 字符串转换为图像 - C#
- css - 使用 CSS Flexbox 构建堆叠布局
- php - 如何单独打印日期,如日、月或年?
- javascript - 构造函数之外的 ES6 解构赋值
- javascript - Youtube 实时聊天消息 Javascript API 响应未与 youtube 网站和应用程序上可见的实际聊天同步
- arm - ARM进入向量表时是使用物理地址还是虚拟地址?
- html - 试图从 POST 请求中检索数据