首页 > 解决方案 > MySQL:从每个组中选择 N% 的随机样本并更新一个字段

问题描述

我有一个名为tbltaskrecord的 MySQL 表,我必须为每个ReviewDate的每个UserId选择 10% 的随机样本,并将字段AuditStatus更新为“检查”。这是表格脚本和一些示例数据:

CREATE TABLE tbltaskrecord(
ReviewDate  DATE  NOT NULL
,UserId      VARCHAR(50) NOT NULL
,TaskId      VARCHAR(50) NOT NULL PRIMARY KEY
,AuditStatus VARCHAR(50)
);

INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000001',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000002',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000003',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000004',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000005',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000006',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000007',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000008',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000009',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000010',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000011',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000012',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000013',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000014',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000015',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000016',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000017',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000018',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000019',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe1','R110000020',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000021',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000022',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000023',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000024',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000025',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000026',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000027',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000028',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000029',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000030',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000031',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000032',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000033',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000034',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000035',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000036',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000037',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000038',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000039',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-19','jdoe2','R110000040',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000041',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000042',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000043',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000044',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000045',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000046',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000047',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000048',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000049',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000050',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000051',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000052',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000053',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000054',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000055',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000056',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000057',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000058',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000059',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe1','R110000060',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000061',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000062',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000063',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000064',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000065',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000066',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000067',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000068',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000069',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000070',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000071',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000072',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000073',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000074',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000075',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000076',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000077',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000078',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000079',NULL);
INSERT INTO tbltaskrecord(ReviewDate,UserId,TaskId,AuditStatus) VALUES ('2018-09-20','jdoe2','R110000080',NULL);

人口:

+-------------+-----------+-----------+-------------+
|   UserId    | 9/19/2018 | 9/20/2018 | Grand Total |
+-------------+-----------+-----------+-------------+
| jdoe1       |        20 |        20 |          40 |
| jdoe2       |        20 |        20 |          40 |
| Grand Total |        40 |        40 |          80 |
+-------------+-----------+-----------+-------------+

样本:

+-------------+-----------+-----------+-------------+
|   UserId    | 9/19/2018 | 9/20/2018 | Grand Total |
+-------------+-----------+-----------+-------------+
| jdoe1       |         2 |         2 |           4 |
| jdoe2       |         2 |         2 |           4 |
| Grand Total |         4 |         4 |           8 |
+-------------+-----------+-----------+-------------+

这就是我想要获得的:

+------------+--------+------------+-------------+
| ReviewDate | UserId |   TaskId   | AuditStatus |
+------------+--------+------------+-------------+
| 2018-09-19 | jdoe1  | R110000008 | Check       |
| 2018-09-19 | jdoe1  | R110000020 | Check       |
| 2018-09-19 | jdoe2  | R110000029 | Check       |
| 2018-09-19 | jdoe2  | R110000037 | Check       |
| 2018-09-20 | jdoe1  | R110000052 | Check       |
| 2018-09-20 | jdoe1  | R110000057 | Check       |
| 2018-09-20 | jdoe2  | R110000070 | Check       |
| 2018-09-20 | jdoe2  | R110000074 | Check       |
+------------+--------+------------+-------------+

这是我尝试选择的内容:

select * from tbltaskrecord WHERE RAND() < .10

但是对于 4 次试验,它给出了 5、6、9 和 8 行。这是上次试验的输出。jdoe1 中只有 2 个,其中没有一个来自日期 2018-09-19。

+------------+--------+-------------+-------------+
| ReviewDate | UserId |   TaskId    | AuditStatus |
+------------+--------+-------------+-------------+
| 2018-09-20 |  jdoe1 |  R110000043 |  NULL       |
| 2018-09-20 |  jdoe1 |  R110000052 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000022 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000028 |  NULL       |
| 2018-09-19 |  jdoe2 |  R110000031 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000062 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000064 |  NULL       |
| 2018-09-20 |  jdoe2 |  R110000080 |  NULL       |
+------------+--------+-------------+-------------+

我怎么做?

标签: mysqlstored-procedures

解决方案


这个解决方案在成为“黑客”的边缘摇摇欲坠——但它是否真的越过这条线超出了我的薪酬等级……

无论如何,无论好坏,这里有一个想法......

SELECT a.ReviewDate 
     , a.UserId 
     , a.TaskId     
     , a.AuditStatus
  FROM 
     ( SELECT x.*
            , CASE WHEN @prev = userid THEN @i:=@i+1 ELSE @i:=1 END i
            , @prev := userid
         FROM
            ( SELECT *
                FROM tbltaskrecord 
               ORDER
                  BY userid,RAND()
            ) x -- my understanding is that this bit shouldn't work. But it does.
            , (SELECT @prev:=null,@i:=0) vars
        ORDER
           BY userid
            , i
     ) a
  JOIN
     (SELECT userid, COUNT(*)/10 pct FROM tbltaskrecord GROUP BY userid) b
    ON b.userid = a.userid
   AND b.pct >= a.i;

+------------+--------+------------+-------------+
| ReviewDate | UserId | TaskId     | AuditStatus |
+------------+--------+------------+-------------+
| 2018-09-20 | jdoe1  | R110000046 | NULL        |
| 2018-09-20 | jdoe1  | R110000042 | NULL        |
| 2018-09-19 | jdoe1  | R110000012 | NULL        |
| 2018-09-19 | jdoe1  | R110000016 | NULL        |
| 2018-09-20 | jdoe2  | R110000077 | NULL        |
| 2018-09-19 | jdoe2  | R110000034 | NULL        |
| 2018-09-19 | jdoe2  | R110000022 | NULL        |
| 2018-09-19 | jdoe2  | R110000026 | NULL        |
+------------+--------+------------+-------------+
8 rows in set (0.01 sec)

SqlFiddle http://sqlfiddle.com/#!9/bd3256/1


推荐阅读