hive - HIVE分区查询
问题描述
将分区从 tb_ext_txnrecords 移动到 tb_ext_static_txnrecords。
在 tb_ext_txnrecords 表中选择该分区会给出 0 条记录,但 count(*) 会在 HIVE 中给出一个值。
在 HDFS 中,文件从 tb_ext_txnrecords 移动到 tb_ext_static_txnrecords。
Hadoop版本:Hadoop 2.7.1 Hive版本:Hive 2.3.9
** 代码片段
-- Dynamic external
create external table tb_ext_txnrecords (txnno INT, custno INT,
amount DOUBLE, category varchar(100), product varchar(100), city varchar(100),
spendby varchar(100), txndate date)
partitioned by (state varchar(100))
clustered by (category) sorted by (city) into 3 buckets
row format delimited fields terminated by ','
stored as orc;
set hive.exec.dynamic.partition.mode=nonstrict;
Insert into table tb_ext_txnrecords partition (state)
select txnno,custno,amount, category, product,city,spendby,from_unixtime(unix_timestamp(txndate,'MM-dd-yyyy'),'yyyy-MM-dd'),state
from retail_db.txnrecords;
-- static external
create external table tb_ext_static_txnrecords (txnno INT, custno INT,
amount DOUBLE, category varchar(100), product varchar(100), city varchar(100),
spendby varchar(100), txndate date)
partitioned by (state varchar(100))
clustered by (category) sorted by (city) into 3 buckets
row format delimited fields terminated by ','
stored as orc;
set hive.exec.dynamic.partition.mode=strict;
set hive.strict.checks.bucketing=false; -- to load into a bucketted table
-- Moving Alabama partition of tb_ext_txnrecords into a new static partition table tb_ext_static_txnrecords
load data inpath '/user/hduser/hive_handson_use_cases/tb_ext_txnrecords/state=Alabama'
overwrite into table tb_ext_static_txnrecords partition(state='Alabama');
0: jdbc:hive2://localhost:10000/default> msck repair table tb_ext_txnrecords;
No rows affected (1.239 seconds)
0: jdbc:hive2://localhost:10000/default> select count(*) from tb_ext_static_txnrecords where state='Alabama';
WARNING: Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
+-------+
| _c0 |
+-------+
| 2160 |
+-------+
1 row selected (45.4 seconds)
0: jdbc:hive2://localhost:10000/default> select count(*) from tb_ext_txnrecords where state='Alabama';
+-------+
| _c0 |
+-------+
| 2160 |
+-------+
1 row selected (0.275 seconds)
0: jdbc:hive2://localhost:10000/default> select * from tb_ext_txnrecords where state='Alabama';
+--------------------------+---------------------------+---------------------------+-----------------------------+----------------------------+-------------------------+----------------------------+----------------------------+--------------------------+
| tb_ext_txnrecords.txnno | tb_ext_txnrecords.custno | tb_ext_txnrecords.amount | tb_ext_txnrecords.category | tb_ext_txnrecords.product | tb_ext_txnrecords.city | tb_ext_txnrecords.spendby | tb_ext_txnrecords.txndate | tb_ext_txnrecords.state |
+--------------------------+---------------------------+---------------------------+-----------------------------+----------------------------+-------------------------+----------------------------+----------------------------+--------------------------+
+--------------------------+---------------------------+---------------------------+-----------------------------+----------------------------+-------------------------+----------------------------+----------------------------+--------------------------+
No rows selected (0.222 seconds)
解决方案
推荐阅读
- jquery - 带有滑块和百分比显示的 Jquery 缩放
- neo4j - 查找接口方法的实现
- javascript - 获取未定义的值而不是返回的字符串?
- excel - 如何在“对于每个循环”中进行偏移
- git - 可能有一个分支是 GitHub 上的 master 上的补丁?
- java - “如何解决“使用毕加索库无法将图像上传到 firebase-storage 的问题”
- python - FTP文件传输期间的Python数据通道超时
- javascript - Nest JS GraphQL“不能为非空返回空”
- c++ - 无法从多态嵌套类访问包含类的成员
- node.js - Mac 错误:/bin/sh: npm: command not found