首页 > 解决方案 > 基于值聚合对行进行分组

问题描述

我正在尝试编写一个查询,其目的是将多个顺序行分组以供将来处理。这种分组的规则是:

这是一个例子:

SET NOCOUNT ON;

DROP TABLE IF EXISTS #tmpIncoming;

CREATE TABLE #tmpIncoming
(
  [Segment] int NOT NULL,
  [Weight]  int NOT NULL
);

INSERT INTO #tmpIncoming VALUES
( 1,  25),
( 2,  45),
( 3,  20),
( 4,  30),
( 5,  50),
( 6,  21),
( 7, 110);

DECLARE @nMaxChunkSize int = 100;

-- BEGIN: suboptimal
DROP TABLE IF EXISTS #tmpResult;

CREATE TABLE #tmpResult
(
    [MinSegment] int NOT NULL,
    [MaxSegment] int NOT NULL,
    [Weight]   int NOT NULL
);

DECLARE cur CURSOR LOCAL READ_ONLY FORWARD_ONLY STATIC FOR
SELECT
    [Segment],
    [Weight]
FROM
    #tmpIncoming
ORDER BY
    [Segment];

OPEN cur;

DECLARE @nMinSegment int = 0, @nMaxSegment int = 0;
DECLARE @nWeightSoFar int = 0;

WHILE (1=1)
BEGIN
    DECLARE @nSegment int, @nWeight int;
    FETCH NEXT FROM cur INTO @nSegment, @nWeight;

    IF (@@FETCH_STATUS <> 0)
        BREAK;

    IF (@nWeightSoFar + @nWeight > @nMaxChunkSize)
    BEGIN
        INSERT INTO #tmpResult ([MinSegment], [MaxSegment], [Weight])
        VALUES (@nMinSegment, @nMaxSegment, @nWeightSoFar);

        SET @nMinSegment = @nSegment;
        SET @nMaxSegment = @nSegment;
        SET @nWeightSoFar = @nWeight;
    END
    ELSE
    BEGIN
        IF (@nMinSegment = 0)
            SET @nMinSegment = @nSegment;
        SET @nMaxSegment = @nSegment;
        SET @nWeightSoFar = @nWeightSoFar + @nWeight;
    END;
END;

CLOSE cur;
DEALLOCATE cur;

IF (@nWeightSoFar > 0)
    INSERT INTO #tmpResult ([MinSegment], [MaxSegment], [Weight])
    VALUES (@nMinSegment, @nMaxSegment, @nWeightSoFar);

SELECT * FROM #tmpResult;

DROP TABLE IF EXISTS #tmpResult;
-- END: suboptimal

DROP TABLE IF EXISTS #tmpIncoming;

我只能想到一个使用游标变量的次优实现。谁能推荐一种更好的方法,最好只有一个 SELECT 和一些 CTE?

标签: sqlsql-servertsql

解决方案


您可以使用递归按顺序循环权重,并在超过块大小时立即重置。

DECLARE @nMaxChunkSize int = 100;

;WITH x AS 
(
  SELECT Segment, 
         Weight, 
         rn = ROW_NUMBER() OVER (ORDER BY Segment)
    FROM #tmpIncoming
),
cte AS 
(
  SELECT Segment, Weight, rn, total = Weight, flip = 0 
  FROM x 
  WHERE rn = 1
  
  UNION ALL

  SELECT x.Segment, x.Weight, x.rn, total = CASE 
    WHEN x.Weight + cte.Total > @nMaxChunkSize 
    THEN x.Weight ELSE x.Weight + cte.Total END,
    flip = flip + CASE 
    WHEN x.Weight + cte.Total > @nMaxChunkSize 
    THEN 1 ELSE 0 END
  FROM x JOIN cte  
  ON x.rn = cte.rn + 1
)
SELECT MinSegment = MIN(Segment), 
       MaxSegment = MAX(Segment),
       Weight     = MAX(total)
FROM cte
GROUP BY flip
ORDER BY MinSegment
OPTION (MAXRECURSION 0);

结果:

最小段 最大段 重量
1 3 90
4 5 80
6 6 21
7 7 110

另一种产生相同结果但可能更容易分解/遵循的方法(尽管可以说代码很快变得与原始代码一样冗长):

DECLARE @nMaxChunkSize int = 100;

;WITH x AS 
(
  SELECT Segment, 
         Weight, 
         rn = ROW_NUMBER() OVER (ORDER BY Segment)
    FROM #tmpIncoming
),
cte AS 
(
  SELECT Segment, Weight, rn, Total = Weight
  FROM x 
  WHERE rn = 1
  
  UNION ALL

  SELECT x.Segment, x.Weight, x.rn, Total = CASE 
    WHEN x.Weight + cte.Total > @nMaxChunkSize 
    THEN x.Weight ELSE x.Weight + cte.Total END
  FROM x JOIN cte ON x.rn = cte.rn + 1
)
SELECT MinSegment = MIN(Segment), 
       MaxSegment = MAX(Segment),
       Weight     = MAX(Total)
FROM 
(
  SELECT Segment, Total, 
    NewGroup = SUM(CASE WHEN Weight = Total THEN 1 ELSE 0 END) 
    OVER (ORDER BY Segment ROWS UNBOUNDED PRECEDING) FROM cte
) AS y
GROUP BY NewGroup
ORDER BY MinSegment
OPTION (MAXRECURSION 0);

推荐阅读