sql - Athena SQL中Case语句中的百分位数
问题描述
尝试在 Athena SQL 中执行此操作。我有 '75' 作为默认 tax_type,我需要用 '76' 替换 20% 的 tax_amt
电流输出原样
geocode tax_type tax_amt
32 75 10
32 75 15
32 75 20
32 75 18
32 75 20
32 75 17
我需要输出为
geocode tax_type tax_amt
32 75 10
32 75 15
32 75 20
32 75 18
32 76 20
32 75 17
我不断收到以下错误:
"GROUP BY 子句不能包含聚合或窗口函数:["count"("tax_amt"), "count"( ), "count"("tax_amt"), "count"( )]"
如果我从group by
子句中删除,那么它会说
“必须是聚合表达式或出现在 GROUP BY 子句中”
任何帮助将非常感激
SQL
WITH a AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_fact"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_fact")
)
, b AS
(SELECT g.geocode_id
, g.geocode
, coalesce(sq_actual_names.actual_name, g.jurisdiction_name) AS JUR_NAME
FROM "dl_wireless_boost"."ngpp_geocode_dim" g
LEFT JOIN
(SELECT REPLACE(geocode,'-','') AS stripped_geocode
, MAX(jurisdiction_name) AS actual_name
FROM "dl_wireless_boost"."ngpp_geocode_dim"
WHERE 1=1 AND dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
AND jurisdiction_name <> 'U'
GROUP BY 1) sq_actual_names
ON g.geocode = sq_actual_names.stripped_geocode
WHERE 1=1
AND g.dt = (SELECT MAX(dt) FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
)
, d AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_code_dim"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_code_dim")
)
, agg AS
(SELECT
CASE
WHEN d.tax_auth = '1' THEN SUBSTRING(b.geocode,1,2) || '0000000'
WHEN d.tax_auth = '2' THEN SUBSTRING(b.geocode,1,5) || '0000'
ELSE b.geocode
END AS GEOCODE
, b.JUR_NAME AS JURIS_NAME
, CAST(COUNT(a.tax_amt)/COUNT(*)*100 AS DECIMAL(30,2)) AS PERCENTILE
, d.tax_type
, CAST (d.tax_rate AS DECIMAL (30,5)) AS TAX_RATE
, SUM(CAST(a.tax_amt AS DECIMAL(30,2))) AS TAX
, SUBSTRING(b.geocode,1,2) AS STATE_GEO
, 'PPD Boost Usage' AS ID
, d.tax_auth AS TAX_AUTH
, f.auth_name AS AUTH_NAME
, CASE
WHEN SUBSTRING(b.geocode,1,2) = '72'
THEN '417'
ELSE '317'
END AS BU
, c.tax_descrp AS TAX_DESCRP
, CASE
WHEN b.geocode LIKE '%-%-%'
THEN b.geocode
ELSE SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4)
END AS GEOMASK
, 'NO' AS COST_RECOVERY_IDENTIFIER
, CONCAT(SUBSTRING(b.geocode,1,2),d.tax_auth,d.tax_type) AS GL_CROSS_REF
, SUBSTRING(load_id,1,6) AS YYYYMM
, c.gl_account AS GL_ACCOUNT
, d.tax_srv_type
FROM a
LEFT JOIN b
ON a.geocode_id = b.geocode_id
LEFT JOIN d
ON a.tax_code_id = d.tax_code_id
LEFT JOIN "sbx_accntnganalytics"."tax_auth_name" f
ON d.tax_auth = f.tax_auth
LEFT JOIN "sbx_accntnganalytics"."tax_gl_desc" c
ON d.tax_type = c.tax_type
AND SUBSTRING(b.geocode,1,2) = c.state_geo
GROUP BY 1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18
)
SELECT GEOCODE
, JURIS_NAME
, PERCENTILE
, CASE
when STATE_GEO = '32' AND tax_type = '75' AND CAST(COUNT(tax)/COUNT(*)*100 AS DECIMAL(30,2)) = 0.20
THEN REPLACE (tax_type,'75','76') else tax_type end as tax_type
, TAX_RATE
, TAX
, STATE_GEO
, TAX_AUTH
, AUTH_NAME
, BU
, TAX_DESCRP
, GEOMASK
, COST_RECOVERY_IDENTIFIER
, GL_CROSS_REF
, YYYYMM
, GL_ACCOUNT
, tax_srv_type
FROM agg
解决方案
因为您在同一表达式中将分组列与聚合组合在一起,所以您会遇到GROUP BY
错误。考虑先聚合,然后运行您的CASE
表达式,这可以通过公用表表达式 (CTE) usingWITH
子句来促进。你甚至可以CASE
用多个WHEN
逻辑表达式来展平你的。
WITH a AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_fact"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_fact")
)
, b AS
(SELECT g.geocode_id
, g.geocode
, coalesce(sq_actual_names.actual_name, g.jurisdiction_name) AS JUR_NAME
FROM "dl_wireless_boost"."ngpp_geocode_dim" g `enter code here`
LEFT JOIN
(SELECT REPLACE(geocode,'-','') AS stripped_geocode
, MAX(jurisdiction_name) AS actual_name
FROM "dl_wireless_boost"."ngpp_geocode_dim"
WHERE 1=1 AND dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
AND jurisdiction_name <> 'U'
GROUP BY 1) sq_actual_names
ON g.geocode = sq_actual_names.stripped_geocode
WHERE 1=1
AND g.dt = (SELECT MAX(dt) FROM "dl_wireless_boost"."ngpp_geocode_dim")
AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4))
)
, d AS
(SELECT *
FROM "dl_wireless_boost"."ngpp_tax_code_dim"
WHERE dt = (SELECT MAX(dt)
FROM "dl_wireless_boost"."ngpp_tax_code_dim")
)
, agg AS
(SELECT
CASE
WHEN d.tax_auth = '1' THEN SUBSTRING(b.geocode,1,2) || '0000000'
WHEN d.tax_auth = '2' THEN SUBSTRING(b.geocode,1,5) || '0000'
ELSE b.geocode
END AS GEOCODE
, b.JUR_NAME AS JURIS_NAME
, CAST(COUNT(a.tax_amt)/COUNT(*)*100 AS DECIMAL(30,2)) AS PERCENTILE,
, d.tax_type
, CAST (d.tax_rate AS DECIMAL (30,5)) AS TAX_RATE
, SUM(CAST(a.tax_amt AS DECIMAL(30,2))) AS TAX
, SUBSTRING(b.geocode,1,2) AS STATE_GEO
, 'PPD Boost Usage' AS ID
, d.tax_auth AS TAX_AUTH
, f.auth_name AS AUTH_NAME
, CASE
WHEN SUBSTRING(b.geocode,1,2) = '72'
THEN '417'
ELSE '317'
END AS BU
, c.tax_descrp AS TAX_DESCRP
, CASE
WHEN b.geocode LIKE '%-%-%'
THEN b.geocode
ELSE SUBSTRING(geocode,1,2) || '-' ||
SUBSTRING(geocode,3,3) || '-' ||
SUBSTRING(geocode,6,4)
END AS GEOMASK
, 'NO' AS COST_RECOVERY_IDENTIFIER
, CONCAT(SUBSTRING(b.geocode,1,2),d.tax_auth,d.tax_type) AS GL_CROSS_REF
, SUBSTRING(load_id,1,6) AS YYYYMM
, c.gl_account AS GL_ACCOUNT
, d.tax_srv_type
FROM a
LEFT JOIN b
ON a.geocode_id = b.geocode_id
LEFT JOIN d
ON a.tax_code_id = d.tax_code_id
LEFT JOIN "sbx_accntnganalytics"."tax_auth_name" f
ON d.tax_auth = f.tax_auth
LEFT JOIN "sbx_accntnganalytics"."tax_gl_desc" c
ON d.tax_type = c.tax_type
AND SUBSTRING(b.geocode,1,2) = c.state_geo
GROUP BY 1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18
)
SELECT GEOCODE
, JURIS_NAME
, PERCENTILE
, CASE
WHEN STATE_GEO = '32' AND d.tax_type = '75' AND PERCENTILE = 0.20
THEN REPLACE (d.tax_type,'75','76')
WHEN (STATE_GEO <> '32' OR d.tax_type <> '75') AND PERCENTILE = 0.80
THEN d.tax_type
ELSE NULL
END AS TAX_TYPE
, TAX_RATE
, TAX
, STATE_GEO
, ID
, TAX_AUTH
, AUTH_NAME
, BU
, TAX_DESCRP
, GEOMASK
, COST_RECOVERY_IDENTIFIER
, GL_CROSS_REF
, YYYYMM
, GL_ACCOUNT
, tax_srv_type
FROM agg
除了 - 一定要注意要踢的坏习惯:使用表别名,如 (a, b, c) 或 (t1, t2, t3)和更多信息别名。
推荐阅读
- react-admin - 你可以在配置中禁用 React-Admin Undo 功能吗?
- python - 如何为数据框中的每个单独类别绘制一周中的每一天
- c - 运行时不可分割的循环大小对 openMP SIMD 的影响
- php - php函数(将html添加到主题)不起作用
- php - Laravel 工作检查网站(如果在线)
- inno-setup - Inno Setup:共享文件未从我的安装目录中删除
- batch-file - 从 Batch Windows 7 行中提取字母数字字符串
- angular - 为什么将社交登录添加到 Angular 6 npm start 后失败
- gnuradio - 是否可以配置 gr-ieee-80211 wifi_rx 文件以接收更高级别的层?
- python - 使用 URL 将模型设置为 true 或 false