sql - 使用带有破折号的数据源名称查询 Druid SQL 内部联接
问题描述
如何在两个数据源之间编写一个 INNER JOIN 查询,其中一个数据源的模式名称为破折号
对 Druid SQL 二进制文件执行以下查询会导致查询错误
SELECT *
FROM first
INNER JOIN "second-schema" on first.device_id = "second-schema".device_id;
org.apache.druid.java.util.common.ISE:无法构建查询计划
尝试引用名称中包含破折号的数据源时,这是正确的语法吗?
架构
[
{
"dataSchema": {
"dataSource": "second-schema",
"parser": {
"type": "string",
"parseSpec": {
"format": "json",
"timestampSpec": {
"column": "ts_start"
},
"dimensionsSpec": {
"dimensions": [
"etid",
"device_id",
"device_name",
"x_1",
"x_2",
"x_3",
"vlan",
"s_x",
"d_x",
"d_p",
"msg_type"
],
"dimensionExclusions": [],
"spatialDimensions": []
}
}
},
"metricsSpec": [
{ "type": "hyperUnique", "name": "conn_id_hll", "fieldName": "conn_id"},
{
"type": "count",
"name": "event_count"
}
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "HOUR",
"queryGranularity": "minute"
}
},
"ioConfig": {
"type": "realtime",
"firehose": {
"type": "kafka-0.8",
"consumerProps": {
"zookeeper.connect": "localhost:2181",
"zookeeper.connectiontimeout.ms": "15000",
"zookeeper.sessiontimeout.ms": "15000",
"zookeeper.synctime.ms": "5000",
"group.id": "flow-info",
"fetch.size": "1048586",
"autooffset.reset": "largest",
"autocommit.enable": "false"
},
"feed": "flow-info"
},
"plumber": {
"type": "realtime"
}
},
"tuningConfig": {
"type": "realtime",
"maxRowsInMemory": 50000,
"basePersistDirectory": "\/opt\/druid-data\/realtime\/basePersist",
"intermediatePersistPeriod": "PT10m",
"windowPeriod": "PT15m",
"rejectionPolicy": {
"type": "serverTime"
}
}
},
{
"dataSchema": {
"dataSource": "first",
"parser": {
"type": "string",
"parseSpec": {
"format": "json",
"timestampSpec": {
"column": "ts_start"
},
"dimensionsSpec": {
"dimensions": [
"etid",
"category",
"device_id",
"device_name",
"severity",
"x_2",
"x_3",
"x_4",
"x_5",
"vlan",
"s_x",
"d_x",
"s_i",
"d_i",
"d_p",
"id"
],
"dimensionExclusions": [],
"spatialDimensions": []
}
}
},
"metricsSpec": [
{ "type": "doubleSum", "name": "val_num", "fieldName": "val_num" },
{ "type": "doubleMin", "name": "val_num_min", "fieldName": "val_num" },
{ "type": "doubleMax", "name": "val_num_max", "fieldName": "val_num" },
{ "type": "doubleSum", "name": "size", "fieldName": "size" },
{ "type": "doubleMin", "name": "size_min", "fieldName": "size" },
{ "type": "doubleMax", "name": "size_max", "fieldName": "size" },
{ "type": "count", "name": "first_count" }
],
"granularitySpec": {
"type": "uniform",
"segmentGranularity": "HOUR",
"queryGranularity": "minute"
}
},
"ioConfig": {
"type": "realtime",
"firehose": {
"type": "kafka-0.8",
"consumerProps": {
"zookeeper.connect": "localhost:2181",
"zookeeper.connectiontimeout.ms": "15000",
"zookeeper.sessiontimeout.ms": "15000",
"zookeeper.synctime.ms": "5000",
"group.id": "first",
"fetch.size": "1048586",
"autooffset.reset": "largest",
"autocommit.enable": "false"
},
"feed": "first"
},
"plumber": {
"type": "realtime"
}
},
"tuningConfig": {
"type": "realtime",
"maxRowsInMemory": 50000,
"basePersistDirectory": "\/opt\/druid-data\/realtime\/basePersist",
"intermediatePersistPeriod": "PT10m",
"windowPeriod": "PT15m",
"rejectionPolicy": {
"type": "serverTime"
}
}
}
]
解决方案
根据您的架构定义,我将进行一些观察。
在进行连接时,您通常必须明确列出列(不使用 *),否则您会从重复列中得到冲突。例如,在您的加入中,您在“第一”和“第二模式”中都有一个 device_id,更不用说在两者中相同的所有其他列。
使用文字分隔符时,我不会混淆它们。我要么使用它们,要么不使用它们。
所以我认为你的查询将以更像这样的形式更好地工作
SELECT
"first"."etid",
"first"."category",
"first"."device_id",
"first"."device_name",
"first"."severity",
"first"."x_2",
"first"."x_3",
"first"."x_4",
"first"."x_5",
"first"."vlan",
"first"."s_x",
"first"."d_x",
"first"."s_i",
"first"."d_i",
"first"."d_p",
"first"."id",
"second-schema"."etid" as "ss_etid",
"second-schema"."device_id" as "ss_device_id",
"second-schema"."device_name" as "ss_device_name",
"second-schema"."x_1" as "ss_x_1",
"second-schema"."x_2" as "ss_x_2",
"second-schema"."x_3" as "ss_x_3",
"second-schema"."vlan" as "ss_vlan",
"second-schema"."s_x" as "ss_s_x",
"second-schema"."d_x" as "ss_d_x",
"second-schema"."d_p" as "ss_d_p",
"second-schema"."msg_type"
FROM "first"
INNER JOIN "second-schema" ON "first"."device_id" = "second-schema"."device_id";
显然,您可以随意命名列,或者根据需要包含排除列。仅当两个表中的所有列都是唯一的时,Select * 才会起作用。
推荐阅读
- objective-c - 在 macOS 11 Big Sur 中禁用了 NSSavePanel 附件视图
- c# - 奇怪的字符串换行的东西
- c - 如何在不区分大小写的情况下比较两个字符串?
- xml - VSCode如何为使用URN xmlns的xml文件启用xml自动完成
- javascript - SAPUI5 使用 QuuniT 测试过滤的问题
- html-email - 在 gmail 中收到的 HTML 邮件,outlook 以删除
标记呈现 - javascript - 遍历数组列表jquery,然后对对象属性进行一些更改。更改后创建一个新的数组列表
- google-app-engine - Google App Engine:调试仪表板 > 流量 > 已发送
- outlook - 如何在 Microsoft Forms 365 中生成和处理内部 ID
- google-apps-script - 试图制作一个脚本来将两个表添加在一起