首页 > 解决方案 > Mysql 使用 group by order by 性能缓慢

问题描述

我正在使用 Mysql 5.7 我有一个有 7006500 行的表。我的查询执行分组并获取具有最大计数的行,列上的每个组都已编入索引但仍需要时间来执行。下面是我的查询、执行计划和表模式。

表架构

        CREATE TABLE templog (
          id bigint(20) unsigned NOT NULL AUTO_INCREMENT,
          userid bigint(12) unsigned NOT NULL,
          type tinyint(3) NOT NULL DEFAULT '0',
          os tinyint(4) NOT NULL DEFAULT '0',
          day date DEFAULT NULL,
          activetime smallint(5) unsigned NOT NULL DEFAULT '0',
          createdat datetime NOT NULL DEFAULT CURRENT_TIMESTAMP,
          timegroupid tinyint(4) NOT NULL DEFAULT '0',
          PRIMARY KEY (`id`),
          KEY templog_type_IDX (`type`,`day`,`userid`,`timegroupid`) USING BTREE
        ) ENGINE=InnoDB AUTO_INCREMENT=7006500 DEFAULT CHARSET=utf8;

我的查询:-

SELECT  SQL_NO_CACHE y.userid, y.timegroupid as besttime,y.cnt
    FROM (
        SELECT  @row_number := CASE WHEN @userid=x.userid THEN @row_number+1 ELSE 1 END AS row_number ,
                @userid := x.userid AS userid ,x.cnt,x.timegroupid
            FROM (
                SELECT  userid, timegroupid ,COUNT(userid) as cnt
                    from  templog
                    where  type = 3
                      AND  day BETWEEN '2020-01-01' AND '2020-01-20'
                      AND  userid < 771267
                    GROUP by  userid, timegroupid
                    ORDER by  userid DESC ,cnt DESC 
                  ) x,
            ( SELECT  @row_number:=0, @userid:='') AS t 
          ) y
    where  y.row_number = 1
    ORDER by  y.userid DESC
    LIMIT  1000;

查询解释格式:

                {
              "query_block": {
                "select_id": 1,
                "cost_info": {
                  "query_cost": "12.00"
                },
                "ordering_operation": {
                  "using_filesort": true,
                  "table": {
                    "table_name": "y",
                    "access_type": "ref",
                    "possible_keys": [
                      "<auto_key0>"
                    ],
                    "key": "<auto_key0>",
                    "used_key_parts": [
                      "row_number"
                    ],
                    "key_length": "9",
                    "ref": [
                      "const"
                    ],
                    "rows_examined_per_scan": 10,
                    "rows_produced_per_join": 10,
                    "filtered": "100.00",
                    "cost_info": {
                      "read_cost": "10.00",
                      "eval_cost": "2.00",
                      "prefix_cost": "12.00",
                      "data_read_per_join": "320"
                    },
                    "used_columns": [
                      "row_number",
                      "userid",
                      "cnt",
                      "timegroupid"
                    ],
                    "attached_condition": "((`y`.`row_number` <=> 1))",
                    "materialized_from_subquery": {
                      "using_temporary_table": true,
                      "dependent": false,
                      "cacheable": true,
                      "query_block": {
                        "select_id": 2,
                        "cost_info": {
                          "query_cost": "6441.25"
                        },
                        "nested_loop": [
                          {
                            "table": {
                              "table_name": "t",
                              "access_type": "system",
                              "rows_examined_per_scan": 1,
                              "rows_produced_per_join": 1,
                              "filtered": "100.00",
                              "cost_info": {
                                "read_cost": "0.00",
                                "eval_cost": "0.20",
                                "prefix_cost": "0.00",
                                "data_read_per_join": "16"
                              },
                              "used_columns": [
                                "@row_number:=0",
                                "@userid:=''"
                              ],
                              "materialized_from_subquery": {
                                "using_temporary_table": true,
                                "dependent": false,
                                "cacheable": true,
                                "query_block": {
                                  "select_id": 4,
                                  "message": "No tables used"
                                }
                              }
                            }
                          },
                          {
                            "table": {
                              "table_name": "x",
                              "access_type": "ALL",
                              "rows_examined_per_scan": 25725,
                              "rows_produced_per_join": 25725,
                              "filtered": "100.00",
                              "cost_info": {
                                "read_cost": "1296.25",
                                "eval_cost": "5145.00",
                                "prefix_cost": "6441.25",
                                "data_read_per_join": "602K"
                              },
                              "used_columns": [
                                "userid",
                                "timegroupid",
                                "cnt"
                              ],
                              "materialized_from_subquery": {
                                "using_temporary_table": true,
                                "dependent": false,
                                "cacheable": true,
                                "query_block": {
                                  "select_id": 3,
                                  "cost_info": {
                                    "query_cost": "140807.11"
                                  },
                                  "ordering_operation": {
                                    "using_filesort": true,
                                    "grouping_operation": {
                                      "using_temporary_table": true,
                                      "using_filesort": false,
                                      "table": {
                                        "table_name": "templog",
                                        "access_type": "range",
                                        "possible_keys": [
                                          "templog_type_IDX"
                                        ],
                                        "key": "templog_type_IDX",
                                        "used_key_parts": [
                                          "type",
                                          "day"
                                        ],
                                        "key_length": "13",
                                        "rows_examined_per_scan": 694718,
                                        "rows_pr
            oduced_per_join": 25725,
                                        "filtered": "33.33",
                                        "using_index": true,
                                        "cost_info": {
                                          "read_cost": "1863.51",
                                          "eval_cost": "5145.03",
                                          "prefix_cost": "140807.11",
                                          "data_read_per_join": "803K"
                                        },
                                        "used_columns": [
                                          "id",
                                          "userid",
                                          "type",
                                          "day",
                                          "timegroupid"
                                        ],
                                        "attached_condition": "((`templog`.`type` = 3) and (`templog`.`day` between '2020-01-01' and '2020-01-20') and (`templog`.`userid` < 771267))"
                                      }
                                    }
                                  }
                                }
                              }
                            }
                          }
                        ]
                      }
                    }
                  }
                }
              }
            }

是否有其他方法可以优化查询或更改索引顺序或以另一种方式重写查询以获得更好的性能?

标签: mysqlperformancegroup-bygreatest-n-per-grouptemp-tables

解决方案


  1. 不要指望@variables 像您期望的那样工作。我认为下一个版本开始禁止它们。

  2. 优化器可以随意丢弃ORDER BY派生表中的 。这将导致错误的结果。对子LIMIT查询进行大数据处理可能会阻止这种情况。

  3. 建立和维护一个“汇总表”。这可以显着加快此查询和类似查询的速度。

    CREATE TABLE Summary (
        userid ...,
        timegroupid ...,
        type ...,
        day ...,
        cnt SMALLINT UNSIGNED NOT NULL,  -- COUNT(*)
        tottime INT UNSIGNED NOT NULL,   -- SUM(activetime)
        PRIMARY KEY(timegroupid, userid, type, day)
    

但是,如果没有更好地了解数据,我无法预测此表是否会明显小于原始表。如果它明显更小,则此汇总表将不实用。

  1. 我添加了另一个标签——关注它以获取更多关于 groupwise-max 的讨论。

推荐阅读