首页 > 解决方案 > Athena SQL中Case语句中的百分位数

问题描述

尝试在 Athena SQL 中执行此操作。我有 '75' 作为默认 tax_type,我需要用 '76' 替换 20% 的 tax_amt

电流输出原样

geocode  tax_type  tax_amt
32         75       10
32         75       15
32         75       20
32         75       18
32         75       20
32         75       17

我需要输出为

geocode  tax_type  tax_amt
32         75       10
32         75       15
32         75       20
32         75       18
32         76       20
32         75       17

我不断收到以下错误:

"GROUP BY 子句不能包含聚合或窗口函数:["count"("tax_amt"), "count"( ), "count"("tax_amt"), "count"( )]"

如果我从group by子句中删除,那么它会说

“必须是聚合表达式或出现在 GROUP BY 子句中”

任何帮助将非常感激

SQL

   WITH a AS
   (SELECT * 
    FROM "dl_wireless_boost"."ngpp_tax_fact" 
    WHERE dt = (SELECT MAX(dt) 
                FROM "dl_wireless_boost"."ngpp_tax_fact")
   )
, b AS 
    (SELECT g.geocode_id
         , g.geocode
         , coalesce(sq_actual_names.actual_name, g.jurisdiction_name) AS JUR_NAME
    FROM "dl_wireless_boost"."ngpp_geocode_dim" g 
    LEFT JOIN 
       (SELECT REPLACE(geocode,'-','') AS stripped_geocode
               , MAX(jurisdiction_name) AS actual_name
        FROM "dl_wireless_boost"."ngpp_geocode_dim" 
        WHERE 1=1 AND dt = (SELECT MAX(dt) 
                            FROM "dl_wireless_boost"."ngpp_geocode_dim")
        AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' || 
                                            SUBSTRING(geocode,3,3) || '-' || 
                                            SUBSTRING(geocode,6,4))
        AND jurisdiction_name <> 'U'
        GROUP BY 1) sq_actual_names
     ON g.geocode = sq_actual_names.stripped_geocode
     WHERE 1=1 
       AND g.dt = (SELECT MAX(dt) FROM "dl_wireless_boost"."ngpp_geocode_dim")
       AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' || 
                                           SUBSTRING(geocode,3,3) || '-' || 
                                           SUBSTRING(geocode,6,4))
   )
, d AS 
    (SELECT * 
     FROM "dl_wireless_boost"."ngpp_tax_code_dim" 
     WHERE dt = (SELECT MAX(dt) 
                 FROM "dl_wireless_boost"."ngpp_tax_code_dim")
    )   
, agg AS
    (SELECT 
          CASE 
               WHEN d.tax_auth = '1' THEN SUBSTRING(b.geocode,1,2) || '0000000' 
               WHEN d.tax_auth = '2' THEN SUBSTRING(b.geocode,1,5) || '0000' 
               ELSE b.geocode 
          END AS GEOCODE
         , b.JUR_NAME AS JURIS_NAME
         , CAST(COUNT(a.tax_amt)/COUNT(*)*100 AS DECIMAL(30,2)) AS PERCENTILE
         , d.tax_type 
         , CAST (d.tax_rate AS DECIMAL (30,5)) AS TAX_RATE
         , SUM(CAST(a.tax_amt AS DECIMAL(30,2))) AS TAX
         , SUBSTRING(b.geocode,1,2) AS STATE_GEO
         , 'PPD Boost Usage' AS ID
         , d.tax_auth AS TAX_AUTH
         , f.auth_name AS AUTH_NAME
         , CASE 
              WHEN SUBSTRING(b.geocode,1,2) = '72'
              THEN '417' 
              ELSE '317' 
          END AS BU
        , c.tax_descrp AS TAX_DESCRP
        , CASE 
              WHEN b.geocode LIKE '%-%-%' 
              THEN b.geocode 
              ELSE SUBSTRING(geocode,1,2) || '-' || 
                   SUBSTRING(geocode,3,3) || '-' || 
                   SUBSTRING(geocode,6,4) 
          END AS GEOMASK 
        , 'NO' AS COST_RECOVERY_IDENTIFIER
        , CONCAT(SUBSTRING(b.geocode,1,2),d.tax_auth,d.tax_type) AS GL_CROSS_REF
        , SUBSTRING(load_id,1,6) AS YYYYMM
        , c.gl_account AS GL_ACCOUNT
        , d.tax_srv_type

    FROM a
    LEFT JOIN b
      ON a.geocode_id = b.geocode_id

    LEFT JOIN  d    
      ON a.tax_code_id = d.tax_code_id

    LEFT JOIN "sbx_accntnganalytics"."tax_auth_name" f
       ON d.tax_auth = f.tax_auth

    LEFT JOIN "sbx_accntnganalytics"."tax_gl_desc" c
       ON d.tax_type = c.tax_type 
       AND SUBSTRING(b.geocode,1,2) = c.state_geo

    GROUP BY 1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18
)
SELECT GEOCODE
       , JURIS_NAME
       , PERCENTILE
       , CASE 
                when STATE_GEO = '32' AND tax_type = '75' AND CAST(COUNT(tax)/COUNT(*)*100 AS DECIMAL(30,2)) = 0.20 
                THEN REPLACE (tax_type,'75','76') else tax_type end as tax_type
       , TAX_RATE
       , TAX
       , STATE_GEO
       , TAX_AUTH
       , AUTH_NAME
       , BU
       , TAX_DESCRP
       , GEOMASK 
       , COST_RECOVERY_IDENTIFIER
       , GL_CROSS_REF
       , YYYYMM
       , GL_ACCOUNT
       , tax_srv_type
FROM agg
 

标签: sqlcasewindow-functionsamazon-athena

解决方案


因为您在同一表达式中将分组列与聚合组合在一起,所以您会遇到GROUP BY错误。考虑先聚合,然后运行您的CASE表达式,这可以通过公用表表达式 (CTE) usingWITH子句来促进。你甚至可以CASE用多个WHEN逻辑表达式来展平你的。

WITH a AS
   (SELECT * 
    FROM "dl_wireless_boost"."ngpp_tax_fact" 
    WHERE dt = (SELECT MAX(dt) 
                FROM "dl_wireless_boost"."ngpp_tax_fact")
   )
, b AS 
    (SELECT g.geocode_id
         , g.geocode
         , coalesce(sq_actual_names.actual_name, g.jurisdiction_name) AS JUR_NAME
    FROM "dl_wireless_boost"."ngpp_geocode_dim" g `enter code here`
    LEFT JOIN 
       (SELECT REPLACE(geocode,'-','') AS stripped_geocode
               , MAX(jurisdiction_name) AS actual_name
        FROM "dl_wireless_boost"."ngpp_geocode_dim" 
        WHERE 1=1 AND dt = (SELECT MAX(dt) 
                            FROM "dl_wireless_boost"."ngpp_geocode_dim")
        AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' || 
                                            SUBSTRING(geocode,3,3) || '-' || 
                                            SUBSTRING(geocode,6,4))
        AND jurisdiction_name <> 'U'
        GROUP BY 1) sq_actual_names
     ON g.geocode = sq_actual_names.stripped_geocode
     WHERE 1=1 
       AND g.dt = (SELECT MAX(dt) FROM "dl_wireless_boost"."ngpp_geocode_dim")
       AND (geocode = geocode OR geocode = SUBSTRING(geocode,1,2) || '-' || 
                                           SUBSTRING(geocode,3,3) || '-' || 
                                           SUBSTRING(geocode,6,4))
   )
, d AS 
    (SELECT * 
     FROM "dl_wireless_boost"."ngpp_tax_code_dim" 
     WHERE dt = (SELECT MAX(dt) 
                 FROM "dl_wireless_boost"."ngpp_tax_code_dim")
    )   
, agg AS
    (SELECT 
          CASE 
               WHEN d.tax_auth = '1' THEN SUBSTRING(b.geocode,1,2) || '0000000' 
               WHEN d.tax_auth = '2' THEN SUBSTRING(b.geocode,1,5) || '0000' 
               ELSE b.geocode 
          END AS GEOCODE
         , b.JUR_NAME AS JURIS_NAME
         , CAST(COUNT(a.tax_amt)/COUNT(*)*100 AS DECIMAL(30,2)) AS PERCENTILE,
         , d.tax_type
         , CAST (d.tax_rate AS DECIMAL (30,5)) AS TAX_RATE
         , SUM(CAST(a.tax_amt AS DECIMAL(30,2))) AS TAX
         , SUBSTRING(b.geocode,1,2) AS STATE_GEO
         , 'PPD Boost Usage' AS ID
         , d.tax_auth AS TAX_AUTH
         , f.auth_name AS AUTH_NAME
         , CASE 
              WHEN SUBSTRING(b.geocode,1,2) = '72'
              THEN '417' 
              ELSE '317' 
          END AS BU
        , c.tax_descrp AS TAX_DESCRP
        , CASE 
              WHEN b.geocode LIKE '%-%-%' 
              THEN b.geocode 
              ELSE SUBSTRING(geocode,1,2) || '-' || 
                   SUBSTRING(geocode,3,3) || '-' || 
                   SUBSTRING(geocode,6,4) 
          END AS GEOMASK 
        , 'NO' AS COST_RECOVERY_IDENTIFIER
        , CONCAT(SUBSTRING(b.geocode,1,2),d.tax_auth,d.tax_type) AS GL_CROSS_REF
        , SUBSTRING(load_id,1,6) AS YYYYMM
        , c.gl_account AS GL_ACCOUNT
        , d.tax_srv_type

    FROM a
    LEFT JOIN b
      ON a.geocode_id = b.geocode_id

    LEFT JOIN  d    
      ON a.tax_code_id = d.tax_code_id

    LEFT JOIN "sbx_accntnganalytics"."tax_auth_name" f
       ON d.tax_auth = f.tax_auth

    LEFT JOIN "sbx_accntnganalytics"."tax_gl_desc" c
       ON d.tax_type = c.tax_type 
       AND SUBSTRING(b.geocode,1,2) = c.state_geo

    GROUP BY 1,2,4,5,7,8,9,10,11,12,13,14,15,16,17,18
)

SELECT GEOCODE
       , JURIS_NAME
       , PERCENTILE
       , CASE 
                WHEN STATE_GEO = '32' AND d.tax_type = '75' AND PERCENTILE = 0.20 
                THEN REPLACE (d.tax_type,'75','76') 
                WHEN (STATE_GEO <> '32' OR d.tax_type <> '75') AND PERCENTILE = 0.80 
                THEN d.tax_type 
                ELSE NULL
         END AS TAX_TYPE
       , TAX_RATE
       , TAX
       , STATE_GEO
       , ID
       , TAX_AUTH
       , AUTH_NAME
       , BU
       , TAX_DESCRP
       , GEOMASK 
       , COST_RECOVERY_IDENTIFIER
       , GL_CROSS_REF
       , YYYYMM
       , GL_ACCOUNT
       , tax_srv_type
FROM agg

除了 - 一定要注意要踢的坏习惯:使用表别名,如 (a, b, c) 或 (t1, t2, t3)和更多信息别名。


推荐阅读