scala - 简单的 TableAPI SQL 查询不适用于 Flink 1.10 和 Blink
问题描述
我想使用 TableAPI 定义 Kafka 连接器并在这样描述的表上运行 SQL(由 Kafka 支持)。不幸的是,该Rowtime
定义似乎没有按预期工作。
这是一个可重现的示例:
object DefineSource extends App {
import org.apache.flink.streaming.api.scala._
import org.apache.flink.table.api.scala._
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val config = EnvironmentSettings.newInstance().inStreamingMode().useBlinkPlanner().build()
val tEnv = StreamTableEnvironment.create(env, config)
val rowtime = new Rowtime().watermarksPeriodicBounded(5000)
val schema = new Schema()
.field("k", "string")
.field("ts", "timestamp(3)").rowtime(rowtime)
tEnv.connect(new Kafka()
.topic("test")
.version("universal"))
.withSchema(schema)
.withFormat(new Csv())
.createTemporaryTable("InputTable")
val output = tEnv.sqlQuery(
"""SELECT k, COUNT(*)
| FROM InputTable
| GROUP BY k, TUMBLE(ts, INTERVAL '15' MINUTE)
|""".stripMargin
)
tEnv.toAppendStream[(String, Long)](output).print()
env.execute()
}
产生
org.apache.flink.table.api.TableException: Window aggregate can only be defined over a time attribute column, but TIMESTAMP(3) encountered.
at org.apache.flink.table.planner.plan.rules.logical.StreamLogicalWindowAggregateRule.getInAggregateGroupExpression(StreamLogicalWindowAggregateRule.scala:51)
at org.apache.flink.table.planner.plan.rules.logical.LogicalWindowAggregateRuleBase.onMatch(LogicalWindowAggregateRuleBase.scala:79)
at org.apache.calcite.plan.AbstractRelOptPlanner.fireRule(AbstractRelOptPlanner.java:319)
at org.apache.calcite.plan.hep.HepPlanner.applyRule(HepPlanner.java:560)
at org.apache.calcite.plan.hep.HepPlanner.applyRules(HepPlanner.java:419)
at org.apache.calcite.plan.hep.HepPlanner.executeInstruction(HepPlanner.java:256)
at org.apache.calcite.plan.hep.HepInstruction$RuleInstance.execute(HepInstruction.java:127)
at org.apache.calcite.plan.hep.HepPlanner.executeProgram(HepPlanner.java:215)
at org.apache.calcite.plan.hep.HepPlanner.findBestExp(HepPlanner.java:202)
at org.apache.flink.table.planner.plan.optimize.program.FlinkHepProgram.optimize(FlinkHepProgram.scala:69)
at org.apache.flink.table.planner.plan.optimize.program.FlinkHepRuleSetProgram.optimize(FlinkHepRuleSetProgram.scala:87)
at org.apache.flink.table.planner.plan.optimize.program.FlinkChainedProgram.$anonfun$optimize$1(FlinkChainedProgram.scala:62)
at scala.collection.TraversableOnce.$anonfun$foldLeft$1(TraversableOnce.scala:160)
at scala.collection.TraversableOnce.$anonfun$foldLeft$1$adapted(TraversableOnce.scala:160)
at scala.collection.Iterator.foreach(Iterator.scala:941)
at scala.collection.Iterator.foreach$(Iterator.scala:941)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
at scala.collection.IterableLike.foreach(IterableLike.scala:74)
at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
at scala.collection.TraversableOnce.foldLeft(TraversableOnce.scala:160)
at scala.collection.TraversableOnce.foldLeft$(TraversableOnce.scala:158)
at scala.collection.AbstractTraversable.foldLeft(Traversable.scala:108)
at org.apache.flink.table.planner.plan.optimize.program.FlinkChainedProgram.optimize(FlinkChainedProgram.scala:58)
at org.apache.flink.table.planner.plan.optimize.StreamCommonSubGraphBasedOptimizer.optimizeTree(StreamCommonSubGraphBasedOptimizer.scala:170)
at org.apache.flink.table.planner.plan.optimize.StreamCommonSubGraphBasedOptimizer.doOptimize(StreamCommonSubGraphBasedOptimizer.scala:94)
at org.apache.flink.table.planner.plan.optimize.CommonSubGraphBasedOptimizer.optimize(CommonSubGraphBasedOptimizer.scala:77)
at org.apache.flink.table.planner.delegation.PlannerBase.optimize(PlannerBase.scala:248)
at org.apache.flink.table.planner.delegation.PlannerBase.translate(PlannerBase.scala:151)
at org.apache.flink.table.api.scala.internal.StreamTableEnvironmentImpl.toDataStream(StreamTableEnvironmentImpl.scala:210)
at org.apache.flink.table.api.scala.internal.StreamTableEnvironmentImpl.toAppendStream(StreamTableEnvironmentImpl.scala:107)
我在 Flink 上1.10.0
。
解决方案
不幸的是,这是 1.10 中的一个错误,正如@lijiayan 所说,应该在 1.11+ 中修复
作为 1.10 中的解决方法,您可以改用 DDL:
tEnv.sqlUpdate(
"CREATE TABLE InputTable (\n" +
" k STRING,\n" +
" ts TIMESTAMP(3),\n" +
" WATERMARK FOR ts AS ts - INTERVAL '5' SECOND\n" +
") WITH (\n" +
" 'connector.type' = 'kafka',\n" +
" 'connector.version' = 'universal',\n" +
" 'connector.topic' = 'test',\n" +
" 'connector.properties.zookeeper.connect' = 'localhost:2181',\n" +
" 'connector.properties.bootstrap.servers' = 'localhost:9092',\n" +
" 'format.type' = 'csv'\n" +
")"
);
推荐阅读
- java - 我可以使用直接文件路径访问存储在应用程序特定存储中的文件吗?
- flutter - Flutter 创建从 GridView 元素到 Header 的转换
- javascript - Javascript regEx删除除特殊字符之间的所有括号
- php - Flutter - 使用 HTTP POST 和 PHP:无法在 PHP 中获取传递的数据
- python - Django:如何使用 login_required 测试视图?
- sql-server - 从 SQL Server 存储过程调用 Oracle 过程 - 尝试发送 UDTT 时出现错误“必须声明标量变量”
- python - 在 Python 中使用 selenium 进行网络抓取时,使用 Pandas 双重填充数组
- spring - 如何访问活动 Spring.config.import Url
- r - 将 RShiny 表单链接到 SQLite 数据库时出错
- java - 逗号后超过 6 位(纳秒)的整数