mysql - Mysql 使用 group by order by 性能缓慢
问题描述
我正在使用 Mysql 5.7 我有一个有 7006500 行的表。我的查询执行分组并获取具有最大计数的行,列上的每个组都已编入索引但仍需要时间来执行。下面是我的查询、执行计划和表模式。
表架构
CREATE TABLE templog (
id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
userid bigint(12) unsigned NOT NULL,
type tinyint(3) NOT NULL DEFAULT '0',
os tinyint(4) NOT NULL DEFAULT '0',
day date DEFAULT NULL,
activetime smallint(5) unsigned NOT NULL DEFAULT '0',
createdat datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
timegroupid tinyint(4) NOT NULL DEFAULT '0',
PRIMARY KEY (`id`),
KEY templog_type_IDX (`type`,`day`,`userid`,`timegroupid`) USING BTREE
) ENGINE=InnoDB AUTO_INCREMENT=7006500 DEFAULT CHARSET=utf8;
我的查询:-
SELECT SQL_NO_CACHE y.userid, y.timegroupid as besttime,y.cnt
FROM (
SELECT @row_number := CASE WHEN @userid=x.userid THEN @row_number+1 ELSE 1 END AS row_number ,
@userid := x.userid AS userid ,x.cnt,x.timegroupid
FROM (
SELECT userid, timegroupid ,COUNT(userid) as cnt
from templog
where type = 3
AND day BETWEEN '2020-01-01' AND '2020-01-20'
AND userid < 771267
GROUP by userid, timegroupid
ORDER by userid DESC ,cnt DESC
) x,
( SELECT @row_number:=0, @userid:='') AS t
) y
where y.row_number = 1
ORDER by y.userid DESC
LIMIT 1000;
查询解释格式:
{
"query_block": {
"select_id": 1,
"cost_info": {
"query_cost": "12.00"
},
"ordering_operation": {
"using_filesort": true,
"table": {
"table_name": "y",
"access_type": "ref",
"possible_keys": [
"<auto_key0>"
],
"key": "<auto_key0>",
"used_key_parts": [
"row_number"
],
"key_length": "9",
"ref": [
"const"
],
"rows_examined_per_scan": 10,
"rows_produced_per_join": 10,
"filtered": "100.00",
"cost_info": {
"read_cost": "10.00",
"eval_cost": "2.00",
"prefix_cost": "12.00",
"data_read_per_join": "320"
},
"used_columns": [
"row_number",
"userid",
"cnt",
"timegroupid"
],
"attached_condition": "((`y`.`row_number` <=> 1))",
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 2,
"cost_info": {
"query_cost": "6441.25"
},
"nested_loop": [
{
"table": {
"table_name": "t",
"access_type": "system",
"rows_examined_per_scan": 1,
"rows_produced_per_join": 1,
"filtered": "100.00",
"cost_info": {
"read_cost": "0.00",
"eval_cost": "0.20",
"prefix_cost": "0.00",
"data_read_per_join": "16"
},
"used_columns": [
"@row_number:=0",
"@userid:=''"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 4,
"message": "No tables used"
}
}
}
},
{
"table": {
"table_name": "x",
"access_type": "ALL",
"rows_examined_per_scan": 25725,
"rows_produced_per_join": 25725,
"filtered": "100.00",
"cost_info": {
"read_cost": "1296.25",
"eval_cost": "5145.00",
"prefix_cost": "6441.25",
"data_read_per_join": "602K"
},
"used_columns": [
"userid",
"timegroupid",
"cnt"
],
"materialized_from_subquery": {
"using_temporary_table": true,
"dependent": false,
"cacheable": true,
"query_block": {
"select_id": 3,
"cost_info": {
"query_cost": "140807.11"
},
"ordering_operation": {
"using_filesort": true,
"grouping_operation": {
"using_temporary_table": true,
"using_filesort": false,
"table": {
"table_name": "templog",
"access_type": "range",
"possible_keys": [
"templog_type_IDX"
],
"key": "templog_type_IDX",
"used_key_parts": [
"type",
"day"
],
"key_length": "13",
"rows_examined_per_scan": 694718,
"rows_pr
oduced_per_join": 25725,
"filtered": "33.33",
"using_index": true,
"cost_info": {
"read_cost": "1863.51",
"eval_cost": "5145.03",
"prefix_cost": "140807.11",
"data_read_per_join": "803K"
},
"used_columns": [
"id",
"userid",
"type",
"day",
"timegroupid"
],
"attached_condition": "((`templog`.`type` = 3) and (`templog`.`day` between '2020-01-01' and '2020-01-20') and (`templog`.`userid` < 771267))"
}
}
}
}
}
}
}
]
}
}
}
}
}
}
是否有其他方法可以优化查询或更改索引顺序或以另一种方式重写查询以获得更好的性能?
解决方案
不要指望@variables 像您期望的那样工作。我认为下一个版本开始禁止它们。
优化器可以随意丢弃
ORDER BY
派生表中的 。这将导致错误的结果。对子LIMIT
查询进行大数据处理可能会阻止这种情况。建立和维护一个“汇总表”。这可以显着加快此查询和类似查询的速度。
CREATE TABLE Summary ( userid ..., timegroupid ..., type ..., day ..., cnt SMALLINT UNSIGNED NOT NULL, -- COUNT(*) tottime INT UNSIGNED NOT NULL, -- SUM(activetime) PRIMARY KEY(timegroupid, userid, type, day)
但是,如果没有更好地了解数据,我无法预测此表是否会明显小于原始表。如果它明显更小,则此汇总表将不实用。
- 我添加了另一个标签——关注它以获取更多关于 groupwise-max 的讨论。
推荐阅读
- java - 如何修复:无法为类 X 调用无参数构造函数:使用 Gson 为该类型注册 InstanceCreator 可能会解决此问题
- firebase - Firebase响应中ETag的含义
- tomcat - Oracle Apex 版本 4.2.0.00.27 升级到 19.1 - 旧应用程序没有 CSS,新应用程序没问题
- jquery - 选择不同的单选按钮后如何获得不同的输出?
- javascript - Three.JS 错误 - 纹理不是二的幂/无法更改纹理
- angularjs - Froala Angularjs 添加自定义按钮
- javascript - 使用 Chrome 扩展程序发送会话 Cookie
- hana - SAP Cloud Platform 试用版 - 无法打开 HDI 容器
- python - 如何在python中使用修改后的输出大小绘制混淆矩阵并输出为.svg图像?
- webpack - 尝试设置 Webpack 时如何解决“错误:您需要提供目标平台”