hadoop - 分区表上列的 hive 计算统计信息失败
问题描述
我正在尝试为已分区的配置单元中的表计算统计信息。我正在运行以下代码
hive --hiveconf hive.root.logger=DRFA --hiveconf hive.log.dir=./logs --hiveconf hive.log.level=ERROR -e "ANALYZE TABLE database.tablename PARTITION(Partition1, Partition2, Partition3, Partition4) COMPUTE STATISTICS FOR COLUMNS;"
我不断收到以下错误:
Cannot get table tablename
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.ColumnStatsTask
Hive 版本是 Hive 2.3.7
日志中的错误如下所示:
2020-11-11T17:33:06,905 ERROR [Tez session start thread] tez.TezSessionState: Failed to start Tez session
java.io.IOException: java.lang.InterruptedException: sleep interrupted
at org.apache.hadoop.hive.ql.exec.tez.TezSessionState.startSessionAndContainers(TezSessionState.java:398) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.tez.TezSessionState.access$000(TezSessionState.java:87) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.tez.TezSessionState$1.call(TezSessionState.java:330) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.tez.TezSessionState$1.call(TezSessionState.java:326) ~[hive-exec-2.3.7.jar:2.3.7]
at java.util.concurrent.FutureTask.run(FutureTask.java:266) ~[?:1.8.0_265]
at java.lang.Thread.run(Thread.java:748) [?:1.8.0_265]
Caused by: java.lang.InterruptedException: sleep interrupted
at java.lang.Thread.sleep(Native Method) ~[?:1.8.0_265]
at org.apache.tez.client.TezClient.waitTillReady(TezClient.java:1020) ~[tez-api-0.9.2.jar:0.9.2]
at org.apache.tez.client.TezClient.waitTillReady(TezClient.java:982) ~[tez-api-0.9.2.jar:0.9.2]
at org.apache.hadoop.hive.ql.exec.tez.TezSessionState.startSessionAndContainers(TezSessionState.java:396) ~[hive-exec-2.3.7.jar:2.3.7]
... 5 more
2020-11-11T17:40:20,117 ERROR [1870388b-f548-4ede-a12b-191e21f34b3e main] metadata.Hive: Table tablename not found: default.tablename table not found
2020-11-11T17:40:20,129 ERROR [1870388b-f548-4ede-a12b-191e21f34b3e main] exec.StatsTask: Cannot get table tablename
org.apache.hadoop.hive.ql.metadata.InvalidTableException: Table not found tablename
at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1279) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1230) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.metadata.Hive.getTable(Hive.java:1217) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.StatsTask.execute(StatsTask.java:128) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2184) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:336) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:787) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686) ~[hive-cli-2.3.7.jar:2.3.7]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_265]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_265]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_265]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_265]
at org.apache.hadoop.util.RunJar.run(RunJar.java:244) ~[hadoop-common-2.10.0.jar:?]
at org.apache.hadoop.util.RunJar.main(RunJar.java:158) ~[hadoop-common-2.10.0.jar:?]
2020-11-11T17:40:20,129 ERROR [1870388b-f548-4ede-a12b-191e21f34b3e main] exec.Task: Cannot get table tablename.apache.hadoop.hive.ql.metadata.InvalidTableException: Table not found tablename
2020-11-11T17:40:27,436 ERROR [1870388b-f548-4ede-a12b-191e21f34b3e main] exec.ColumnStatsTask: Failed to run column stats task
org.apache.hadoop.hive.ql.metadata.HiveException: org.apache.thrift.transport.TTransportException
at org.apache.hadoop.hive.ql.metadata.Hive.setPartitionColumnStatistics(Hive.java:3781) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.ColumnStatsTask.persistColumnStats(ColumnStatsTask.java:420) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.ColumnStatsTask.execute(ColumnStatsTask.java:431) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2184) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:233) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:184) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:403) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:336) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.executeDriver(CliDriver.java:787) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.run(CliDriver.java:759) ~[hive-cli-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.cli.CliDriver.main(CliDriver.java:686) ~[hive-cli-2.3.7.jar:2.3.7]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_265]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_265]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_265]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_265]
at org.apache.hadoop.util.RunJar.run(RunJar.java:244) ~[hadoop-common-2.10.0.jar:?]
at org.apache.hadoop.util.RunJar.main(RunJar.java:158) ~[hadoop-common-2.10.0.jar:?]
Caused by: org.apache.thrift.transport.TTransportException
at org.apache.thrift.transport.TIOStreamTransport.read(TIOStreamTransport.java:132) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.thrift.transport.TTransport.readAll(TTransport.java:86) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.thrift.protocol.TBinaryProtocol.readAll(TBinaryProtocol.java:429) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.thrift.protocol.TBinaryProtocol.readI32(TBinaryProtocol.java:318) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.thrift.protocol.TBinaryProtocol.readMessageBegin(TBinaryProtocol.java:219) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.thrift.TServiceClient.receiveBase(TServiceClient.java:77) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.recv_set_aggr_stats_for(ThriftHiveMetastore.java:3592) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.metastore.api.ThriftHiveMetastore$Client.set_aggr_stats_for(ThriftHiveMetastore.java:3579) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.setPartitionColumnStatistics(HiveMetaStoreClient.java:1678) ~[hive-exec-2.3.7.jar:2.3.7]
at org.apache.hadoop.hive.ql.metadata.SessionHiveMetaStoreClient.setPartitionColumnStatistics(SessionHiveMetaStoreClient.java:355) ~[hive-exec-2.3.7.jar:2.3.7]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_265]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_265]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_265]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_265]
at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:173) ~[hive-exec-2.3.7.jar:2.3.7]
at com.sun.proxy.$Proxy39.setPartitionColumnStatistics(Unknown Source) ~[?:?]
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) ~[?:1.8.0_265]
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) ~[?:1.8.0_265]
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) ~[?:1.8.0_265]
at java.lang.reflect.Method.invoke(Method.java:498) ~[?:1.8.0_265]
at org.apache.hadoop.hive.metastore.HiveMetaStoreClient$SynchronizedHandler.invoke(HiveMetaStoreClient.java:2336) ~[hive-exec-2.3.7.jar:2.3.7]
at com.sun.proxy.$Proxy39.setPartitionColumnStatistics(Unknown Source) ~[?:?]
at org.apache.hadoop.hive.ql.metadata.Hive.setPartitionColumnStatistics(Hive.java:3778) ~[hive-exec-2.3.7.jar:2.3.7]
... 22 more
我不知道为什么当我能够在没有“for columns”的情况下正常计算统计数据时它抱怨表丢失
任何帮助都将不胜感激,因为我正在把头发拉出来。
谢谢
解决方案
推荐阅读
- oauth-2.0 - 如何使用 Gmail REST API 发送电子邮件
- python - 尝试用所有列表替换我的文本
- reactjs - 使用 React Router 时传递状态的最简单方法是什么?
- python - 在具有 GPU 支持的 Tensorflow 中创建自定义 LSTMCell
- java - 试图限制基于图块的游戏中的移动
- mysql - 如何将索引添加到 mysql 数据库以提高性能?
- mysql - 如何从 MySQL 表中的列中删除以某些特定单词开头和结尾的部分文本/段落?
- assembly - 你如何在 SSE2 上进行带符号的 32 位扩展乘法?
- python - 根据 1-2 个公共键值获取 4 个 JSON 文件的交集?(Python)
- python - 用列名填充 NaN 的 Python Dataframe 问题