需求 : 计算每个用户最大的连续登录天数,可以间隔一天。解释:如果一个用户在 1,3,5,6 登录游戏,则视为连续 6 天登录
-- 数据准备
-- DDL create table test2 ( `user_id` string comment '用户id', `login_date` string comment '登入日期', `login_status` string comment '登入状态(1:成功,2:异常)') comment '用户登入记录表' row format delimited fields terminated by '\t' lines terminated by '\n' stored as orc; -- DML insert overwrite table test2 select '1001' as id,'2021-12-12' as occur_date,'1' as lowcarbon union all select '1001' as id,'2021-12-20' as occur_date,'1' as lowcarbon union all select '1001' as id,'2022-02-10' as occur_date,'1' as lowcarbon union all select '1001' as id,'2021-12-20' as occur_date,'0' as lowcarbon union all select '1002' as id,'2021-12-12' as occur_date,'1' as lowcarbon union all select '1001' as id,'2021-12-13' as occur_date,'1' as lowcarbon union all select '1001' as id,'2021-12-13' as occur_date,'1' as lowcarbon union all select '1001' as id,'2021-12-13' as occur_date,'1' as lowcarbon union all select '1002' as id,'2021-12-14' as occur_date,'0' as lowcarbon union all select '1001' as id,'2021-12-14' as occur_date,'1' as lowcarbon union all select '1002' as id,'2021-12-15' as occur_date,'1' as lowcarbon union all select '1001' as id,'2021-12-15' as occur_date,'1' as lowcarbon union all select '1003' as id,'2021-12-15' as occur_date,'0' as lowcarbon union all select '1003' as id,'2021-12-16' as occur_date,'1' as lowcarbon union all select '1003' as id,'2021-12-17' as occur_date,'1' as lowcarbon union all select '1003' as id,'2021-12-18' as occur_date,'1' as lowcarbon union all select '1003' as id,'2021-12-29' as occur_date,'1' as lowcarbon union all select '1001' as id,'2022-01-01' as occur_date,'1' as lowcarbon union all select '1001' as id,'2022-01-01' as occur_date,'1' as lowcarbon union all select '1001' as id,'2022-01-01' as occur_date,'1' as lowcarbon union all select '1001' as id,'2022-01-03' as occur_date,'1' as lowcarbon union all select '1001' as id,'2022-01-05' as occur_date,'1' as lowcarbon union all select '1001' as id,'2022-01-06' as occur_date,'1' as lowcarbon union all select '1003' as id,'2021-12-19' as occur_date,'1' as lowcarbon ; -- 数据说明 -- 某游戏公司记录的用户每日登录数据 -- 每天每天可能有多次登入记录
-- 分析思路
2 4 6 7 8 10 11 13 14 怎样判断序列连续? 连续规则为间隔为1或者2 示例: 1 3 5 6 视为连续 获取前置元素差值法 1. 获取当前元素的前一位元素 2 null 4 2 6 4 7 6 8 7 10 8 11 10 13 11 14 13 2. 当前元素 与前置元素 做差 2 null null 4 2 2 6 4 2 7 6 1 8 7 1 10 8 2 11 10 1 13 11 2 14 13 1 3. 差值为 1、2的 视为连续(对中或队尾)标记为0, 否为视为队首,标记为1 2 null null 1 4 2 2 0 6 4 2 0 7 6 1 0 8 7 1 0 10 8 2 0 11 10 1 0 13 11 2 0 14 13 1 0
-- 执行sql
with t1 as ( select user_id , login_date , if(datediff(login_date, lag(login_date) over (partition by user_id order by login_date asc)) in (0, 1, 2), 0, 1) as if_contin from test2 where login_status = 1 ), t2 as ( select user_id ,login_date ,sum(if_contin) over(partition by user_id order by login_date asc) as contin_group from t1), t3 as ( select user_id ,contin_group ,datediff(max(login_date),min(login_date)) + 1 as contin_days from t2 group by user_id ,contin_group) select user_id ,max(contin_days) from t3 group by user_id ; user_id _c1 1001 6 1002 1 1003 4