views:

59

answers:

4

I need to group a set of rows based on the Category column, and also limit the combined rows based on the SUM(Number) column to be less than or equal to the @Limit value.

For each distinct Category column I need to identify "buckets" that are <=@limit. If the SUM(Number) of all the rows for a Category column are <=@Limit then there will be only 1 bucket for that Category value (like 'CCCC' in the sample data). However if the SUM(Number)>@limit, then there will be multiple bucket rows for that Category value (like 'AAAA' in the sample data), and each bucket must be <=@Limit. There can be as many buckets as necessary. Also, look at Category value 'DDDD', its one row is greater than @Limit all by itself, and gets split into two rows in the result set.

Given this simplified data:

DECLARE @Detail table (DetailID int  primary key, Category char(4), Number int)
SET NOCOUNT ON
INSERT @Detail VALUES ( 1, 'AAAA',100)
INSERT @Detail VALUES ( 2, 'AAAA', 50)
INSERT @Detail VALUES ( 3, 'AAAA',300)
INSERT @Detail VALUES ( 4, 'AAAA',200)
INSERT @Detail VALUES ( 5, 'BBBB',500)
INSERT @Detail VALUES ( 6, 'CCCC',200)
INSERT @Detail VALUES ( 7, 'CCCC',100)
INSERT @Detail VALUES ( 8, 'CCCC', 50)
INSERT @Detail VALUES ( 9, 'DDDD',800)
INSERT @Detail VALUES (10, 'EEEE',100)
INSERT @Detail VALUES (11, 'AAAA',200) --EDIT added
INSERT @Detail VALUES (12, 'AAAA',200) --EDIT added
INSERT @Detail VALUES (13, 'AAAA',200) --EDIT added
INSERT @Detail VALUES (14, 'AAAA',200) --EDIT added
SET NOCOUNT OFF

DECLARE @Limit int
SET @Limit=500

I need one of these result set:

DetailID  Bucket  |    DetailID  Category Bucket
--------  ------  |    --------  -------- ------
 1        1       |     1        'AAAA'   1     
 2        1       |     2        'AAAA'   1     
 3        1       |     3        'AAAA'   1     
 4        2       |     4        'AAAA'   2     
11        2       |    11        'AAAA'   2      --EDIT added
12        3       |    12        'AAAA'   3      --EDIT added
13        3       |    13        'AAAA'   3      --EDIT added
14        4       |    14        'AAAA'   4      --EDIT added
 5        5       OR    5        'BBBB'   1     
 6        6       |     6        'CCCC'   1     
 7        6       |     7        'CCCC'   1     
 8        6       |     8        'CCCC'   1     
 9        7       |     9        'DDDD'   1     
 9        8       |     9        'DDDD'   2     
10        9       |    10        'EEEE'   1   

EDIT after trying out all the answers

With all attempts at a set based solution not working as needed, I'm going with a modification to @GalacticJello Answer, modification noted in the code below. I basically find all the rows where the entire category fits into the bucket and INSERT them using a single INSERT-SELECT, and then loop over the remaining data using @GalacticJello cursor free loop. This will work fine in my situation since there will hardly ever be any rows processed by the loop.

DECLARE @DetailTemp table (PID INT IDENTITY(1,1), DetailID int  primary key, Category char(4), Number int) 
DECLARE @DetailFinal table (DetailID int, Category char(4), Bucket int) ---<<<renamed column to Bucket

DECLARE @DetailCount int
SET @DetailCount = 0;

--------<<<optimization added starts here
;WITH AllSingleBuckets AS (
    SELECT
        Category
        FROM @Detail
        GROUP BY Category
        HAVING SUM(Number)<=@Limit

)
INSERT INTO @DetailFinal
        (DetailID, Category, Bucket)
    SELECT
        d.DetailID,d.Category,1
        FROM @Detail                    d
            INNER JOIN AllSingleBuckets s ON d.Category=s.Category
--------<<<optimization added ends here

INSERT @DetailTemp
--------<<<changed for optimization, added WHERE clause
SELECT d.DetailId, d.Category, d.Number FROM @Detail d WHERE NOT EXISTS (SELECT 1 FROM @DetailFinal f WHERE d.Category=f.Category) ORDER BY Category, DetailId
SELECT @DetailCount = @@ROWCOUNT

DECLARE @CurrentPid int
SET @CurrentPid = 1

DECLARE @ThisId int
DECLARE @ThisCategory char(4)
DECLARE @ThisNumber int

DECLARE @CurrentCategory char(4)
DECLARE @CurrentSum INT
DECLARE @CurrentBucket INT


WHILE @CurrentPid <= @DetailCount
BEGIN
    SELECT @ThisId = DetailId, @ThisCategory = Category, @ThisNumber = Number
    FROM @DetailTemp 
    WHERE PID = @CurrentPid

    IF @ThisCategory = @CurrentCategory
    BEGIN
        IF @CurrentSum + @ThisNumber > @Limit
        BEGIN
            SET @CurrentBucket = @CurrentBucket + 1
            SET @CurrentSum = @ThisNumber
        END
        ELSE
        BEGIN
            SET @CurrentSum = @CurrentSum + @ThisNumber
        END
    END
    ELSE
    BEGIN
        SET @CurrentBucket = 1
        SET @CurrentCategory = @ThisCategory
        SET @CurrentSum = @ThisNumber
    END

    WHILE @CurrentSum > @Limit
    BEGIN
        INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket
        SET @CurrentBucket = @CurrentBucket + 1
        SET @CurrentSum = @CurrentSum - @Limit
    END

    INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket

    SET @CurrentPid = @CurrentPid + 1
END


SELECT * from @DetailFinal ORDER BY Category --------<<<added order by

OUTPUT:

DetailID    Category Bucket
----------- -------- -----------
1           AAAA     1
2           AAAA     1
3           AAAA     1
4           AAAA     2
11          AAAA     2
12          AAAA     3
13          AAAA     3
14          AAAA     4
5           BBBB     1
6           CCCC     1
7           CCCC     1
8           CCCC     1
9           DDDD     1
9           DDDD     2
10          EEEE     1

(15 row(s) affected)
+1  A: 

Maybe the following will be useful for you (it doesn't produce 2 rows for 'DDDD' though; I'm not sure if you can do it without inserting 2 different rows. )

select detailId, category,
FLOOR((SELECT sum(Number)
from Detail where category=t2.category and detailId <= t2.detailId
)/501)+1 as bucket
from Detail t2
order by detailId;
a1ex07
+1  A: 

You need to keep a running total to know when @Limit is reached. Of course, CROSS APPLY may not scale (and depends on indexes too).

Edit: fixed DDDD, bucket 1

;WITH cRunning AS
(
    SELECT
        D1.DetailID, D1.Category, D3.RunningTotal, D3.GroupCount
    FROM
        @Detail D1
        CROSS APPLY
        (SELECT
             Category, COUNT(*) AS GroupCount,
             CAST(SUM(Number) AS int) AS RunningTotal
        FROM @Detail D2
        WHERE D1.Category = D2.Category AND D1.DetailID >= D2.DetailID
        GROUP BY D2.Category) D3
)
SELECT
    DetailID, Category,
    RunningTotal / @Limit + 1 AS Bucket --abuse integer math
FROM
    cRunning
UNION ALL
SELECT --singletons > @Limit
    DetailID, Category, 1
FROM
    cRunning
WHERE
    GroupCount = 1 AND RunningTotal > @Limit
ORDER BY
    Category, DetailID, Bucket

Of course, my first answer works if you throw in a dummy zero row for DDDD

...
INSERT @Detail VALUES ( xxx, 'DDDD',0)
...
SELECT
    D1.DetailID, D1.Category,
    RunningTotal / @Limit + 1 AS Bucket --abuse integer math
FROM
    @Detail D1
    CROSS APPLY
    (SELECT SUM(Number) AS RunningTotal
    FROM @Detail D2
    WHERE D1.Category = D2.Category AND D1.DetailID >= D2.DetailID
    GROUP BY D2.Category) D3
gbn
Couldn't you use a `HAVING` clause to figure out when @Limit is reached in this situation?
Abe Miessler
Abe Miessler: HAVING would work on the SUM for the group, but you need to allocate buckets within the group
gbn
+1  A: 

FINALLY!

I found a couple of bugs with my code, corrected those and now I have this working through a CTE. I thought that if a detail spanned multiple buckets then it would always be split between them. It looks like you now want those bigger than a bucket to span multiple buckets, but other details to be pushed ahead completely to the next bucket. You realize that in this case you could end up with a 50 in a bucket all by itself, right? If the next detail were 500 then it would be pushed forward and the 50 gets a bucket all to itself - roomy!

Anyway, just including the code here as a fully set-based solution in case anyone is interested:

;WITH sequence_ids AS (SELECT DetailID, Category, Number, ROW_NUMBER() OVER (PARTITION BY Category ORDER BY DetailID) AS sequence_id FROM @Detail),
main_cte AS (
    SELECT
        D1.DetailID,
        D1.Category,
        D1.Number,
        CASE WHEN @Limit > D1.Number THEN @Limit - D1.Number ELSE 0 END AS RemainingBucket,
        CASE WHEN D1.Number > @Limit THEN D1.Number - @Limit ELSE 0 END AS RemainingDetail,
        D1.sequence_id,
        1 AS bucket
    FROM
        sequence_ids D1
    WHERE
        sequence_id = 1
    UNION ALL
    SELECT
        D2.DetailID,
        D2.Category,
        D2.Number,
        CASE WHEN COALESCE(NULLIF(RemainingBucket, 0), @Limit) > COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) THEN COALESCE(NULLIF(RemainingBucket, 0), @Limit) - COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) ELSE 0 END AS RemainingBucket,
        CASE WHEN COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) > COALESCE(NULLIF(RemainingBucket, 0), @Limit) THEN COALESCE(NULLIF(main_cte.RemainingDetail, 0), D2.Number) - COALESCE(NULLIF(RemainingBucket, 0), @Limit) ELSE 0 END AS RemainingDetail,
        D2.sequence_id,
        CASE WHEN RemainingBucket = 0 THEN bucket + 1 ELSE bucket END
    FROM
        main_cte
    INNER JOIN sequence_ids D2 ON
        D2.Category = main_cte.Category AND
        ((main_cte.RemainingDetail > 0 AND D2.DetailID = main_cte.DetailID) OR
         (main_cte.RemainingDetail <= 0 AND D2.sequence_id = main_cte.sequence_id + 1))
)
SELECT
    *
FROM
    main_cte
ORDER BY
    Category,
    bucket,
    sequence_id
Tom H.
+1  A: 
DECLARE @Detail table (DetailID int  primary key, Category char(4), Number int) 
SET NOCOUNT ON 
INSERT @Detail VALUES ( 1, 'AAAA',100) 
INSERT @Detail VALUES ( 2, 'AAAA', 50) 
INSERT @Detail VALUES ( 3, 'AAAA',300) 
INSERT @Detail VALUES ( 4, 'AAAA',200) 
INSERT @Detail VALUES ( 5, 'BBBB',500) 
INSERT @Detail VALUES ( 6, 'CCCC',200) 
INSERT @Detail VALUES ( 7, 'CCCC',100) 
INSERT @Detail VALUES ( 8, 'CCCC', 50) 
INSERT @Detail VALUES ( 9, 'DDDD',800) 
INSERT @Detail VALUES (10, 'EEEE',100) 
INSERT @Detail VALUES (11, 'AAAA',200) --EDIT added 
INSERT @Detail VALUES (12, 'AAAA',200) --EDIT added 
INSERT @Detail VALUES (13, 'AAAA',200) --EDIT added 
INSERT @Detail VALUES (14, 'AAAA',200) --EDIT added 
SET NOCOUNT OFF 

DECLARE @Limit int 
SET @Limit=500 

DECLARE @DetailTemp table (PID INT IDENTITY(1,1), DetailID int  primary key, Category char(4), Number int) 
DECLARE @DetailFinal table (DetailID int, Category char(4), Number int) 

DECLARE @DetailCount int
SET @DetailCount = 0;

INSERT @DetailTemp
SELECT DetailId, Category, Number FROM @Detail ORDER BY Category, DetailId
SELECT @DetailCount = @@ROWCOUNT

DECLARE @CurrentPid int
SET @CurrentPid = 1

DECLARE @ThisId int
DECLARE @ThisCategory char(4)
DECLARE @ThisNumber int

DECLARE @CurrentCategory char(4)
DECLARE @CurrentSum INT
DECLARE @CurrentBucket INT


WHILE @CurrentPid <= @DetailCount
BEGIN
    SELECT @ThisId = DetailId, @ThisCategory = Category, @ThisNumber = Number
    FROM @DetailTemp 
    WHERE PID = @CurrentPid

    IF @ThisCategory = @CurrentCategory
    BEGIN
        IF @CurrentSum + @ThisNumber > @Limit
        BEGIN
            SET @CurrentBucket = @CurrentBucket + 1
            SET @CurrentSum = @ThisNumber
        END
        ELSE
        BEGIN
            SET @CurrentSum = @CurrentSum + @ThisNumber
        END
    END
    ELSE
    BEGIN
        SET @CurrentBucket = 1
        SET @CurrentCategory = @ThisCategory
        SET @CurrentSum = @ThisNumber
    END

    WHILE @CurrentSum > @Limit
    BEGIN
        INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket
        SET @CurrentBucket = @CurrentBucket + 1
        SET @CurrentSum = @CurrentSum - @Limit
END

    INSERT @DetailFinal SELECT @ThisId, @CurrentCategory, @CurrentBucket

    SET @CurrentPid = @CurrentPid + 1
END


SELECT * from @DetailFinal
GalacticJello