hive - 如何重写嵌套子查询以便 hive 可以运行它们
问题描述
select
cd_gender,
cd_marital_status,
cd_education_status,
count(*) cnt1,
cd_purchase_estimate,
count(*) cnt2,
cd_credit_rating,
count(*) cnt3,
cd_dep_count,
count(*) cnt4,
cd_dep_employed_count,
count(*) cnt5,
cd_dep_college_count,
count(*) cnt6
from
customer c,customer_address ca,customer_demographics
where
c.c_current_addr_sk = ca.ca_address_sk and
ca_county in ('Greer County','Boone County','Cumberland County','Tyler County','Marion County') and
cd_demo_sk = c.c_current_cdemo_sk and
exists (select *
from store_sales,date_dim
where c.c_customer_sk = ss_customer_sk and
ss_sold_date_sk = d_date_sk and
d_year = 1999 and
d_moy between 1 and 1+3) and
(exists (select *
from web_sales,date_dim
where c.c_customer_sk = ws_bill_customer_sk and
ws_sold_date_sk = d_date_sk and
d_year = 1999 and
d_moy between 1 ANd 1+3) or
exists (select *
from catalog_sales,date_dim
where c.c_customer_sk = cs_ship_customer_sk and
cs_sold_date_sk = d_date_sk and
d_year = 1999 and
d_moy between 1 and 1+3))
group by cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
order by cd_gender,
cd_marital_status,
cd_education_status,
cd_purchase_estimate,
cd_credit_rating,
cd_dep_count,
cd_dep_employed_count,
cd_dep_college_count
limit 100;
当我在 hive 上运行此查询时,它返回此错误
“失败:SemanticException [错误 10249]:org.apache.hadoop.hive.ql.optimizer.calcite.CalciteSubquerySemanticException:第 23:2 行不受支持的子查询表达式'3':只允许顶级连接的子查询表达式”
由于包含嵌套子查询的第二个存在语句而发生此错误。关于如何重写此查询以便它可以在 hive 上工作的任何想法?
解决方案
您可以尝试重新排序查询以防止出现子查询。条件是在 SQL 中连续评估的,因此运算符优先级应该不是问题。
where
exists (select *
from web_sales,date_dim
where c.c_customer_sk = ws_bill_customer_sk and
ws_sold_date_sk = d_date_sk and
d_year = 1999 and
d_moy between 1 ANd 1+3) or
exists (select *
from catalog_sales,date_dim
where c.c_customer_sk = cs_ship_customer_sk and
cs_sold_date_sk = d_date_sk and
d_year = 1999 and
d_moy between 1 and 1+3) and
c.c_current_addr_sk = ca.ca_address_sk and
ca_county in ('Greer County','Boone County','Cumberland County','Tyler County','Marion County') and
cd_demo_sk = c.c_current_cdemo_sk and
exists (select *
from store_sales,date_dim
where c.c_customer_sk = ss_customer_sk and
ss_sold_date_sk = d_date_sk and
d_year = 1999 and
d_moy between 1 and 1+3)