Hi,
my question fits in to your "Balanced sets in SQL" collection of questions.
I have a table as follows (in reality 26 million rows):
CREATE TABLE T (
"PI" VARCHAR2(120),
"S" NUMBER,
"L" NUMBER
);
Insert into T (PI,S,L) values ('000740A0','1','54');
Insert into T (PI,S,L) values ('000740A0','2','103');
Insert into T (PI,S,L) values ('000740A0','3','102');
Insert into T (PI,S,L) values ('000740A0','4','102');
Insert into T (PI,S,L) values ('000740A0','5','173');
Insert into T (PI,S,L) values ('000740A0','6','174');
Insert into T (PI,S,L) values ('000740A0','7','120');
Insert into T (PI,S,L) values ('000740A0','8','156');
Insert into T (PI,S,L) values ('000740A0','9','102');
Insert into T (PI,S,L) values ('000740A0','10','119');
Insert into T (PI,S,L) values ('000740A0','11','102');
Insert into T (PI,S,L) values ('000740A0','12','99');
Insert into T (PI,S,L) values ('000740A0','13','119');
Insert into T (PI,S,L) values ('000740A0','14','148');
Insert into T (PI,S,L) values ('000740A0','15','102');
Insert into T (PI,S,L) values ('000740A0','16','100');
Insert into T (PI,S,L) values ('000740A0','17','119');
Insert into T (PI,S,L) values ('000740A0','18','100');
Insert into T (PI,S,L) values ('000740A0','19','97');
Insert into T (PI,S,L) values ('000740A0','20','119');
Insert into T (PI,S,L) values ('000740A0','21','100');
Insert into T (PI,S,L) values ('000740A0','22','118');
Insert into T (PI,S,L) values ('000740A0','23','146');
Insert into T (PI,S,L) values ('000740A0','24','120');
Insert into T (PI,S,L) values ('000740A0','25','127');
Insert into T (PI,S,L) values ('000740A0','26','124');
Insert into T (PI,S,L) values ('000740A0','27','134');
Insert into T (PI,S,L) values ('000844A0','1','54');
Insert into T (PI,S,L) values ('000844A0','2','103');
Insert into T (PI,S,L) values ('000844A0','3','102');
Insert into T (PI,S,L) values ('000844A0','4','95');
Insert into T (PI,S,L) values ('000844A0','5','102');
Insert into T (PI,S,L) values ('000844A0','6','99');
Insert into T (PI,S,L) values ('000844A0','7','95');
Insert into T (PI,S,L) values ('000844A0','8','102');
Insert into T (PI,S,L) values ('000844A0','9','133');
Insert into T (PI,S,L) values ('000844A0','10','173');
Insert into T (PI,S,L) values ('000844A0','11','120');
Insert into T (PI,S,L) values ('000844A0','12','143');
Insert into T (PI,S,L) values ('000844A0','13','156');
Insert into T (PI,S,L) values ('000844A0','14','144');
Insert into T (PI,S,L) values ('000844A0','15','139');
Insert into T (PI,S,L) values ('000844A0','16','147');
Insert into T (PI,S,L) values ('000844A0','17','131');
Insert into T (PI,S,L) values ('000844A0','18','132');
Insert into T (PI,S,L) values ('000844A0','19','150');
Insert into T (PI,S,L) values ('000844A0','20','128');
Insert into T (PI,S,L) values ('000844A0','21','120');
Insert into T (PI,S,L) values ('000844A0','22','104');
Insert into T (PI,S,L) values ('000844A0','23','123');
Insert into T (PI,S,L) values ('000844A0','24','104');
Insert into T (PI,S,L) values ('000844A0','25','101');
Insert into T (PI,S,L) values ('000844A0','26','123');
Insert into T (PI,S,L) values ('000844A0','27','104');
Insert into T (PI,S,L) values ('000844A0','28','147');
Insert into T (PI,S,L) values ('000844A0','29','100');
Insert into T (PI,S,L) values ('000844A0','30','150');
Insert into T (PI,S,L) values ('000844A0','31','117');
Insert into T (PI,S,L) values ('000844A0','32','129');
Insert into T (PI,S,L) values ('000844A0','33','114');
Insert into T (PI,S,L) values ('000844A0','34','147');
Insert into T (PI,S,L) values ('000844A0','35','138');
Insert into T (PI,S,L) values ('000844A0','36','147');
Insert into T (PI,S,L) values ('000844A0','37','138');
Insert into T (PI,S,L) values ('000844A0','38','147');
Insert into T (PI,S,L) values ('000844A0','39','138');
Insert into T (PI,S,L) values ('000844A0','40','147');
Insert into T (PI,S,L) values ('000844A0','41','138');
Insert into T (PI,S,L) values ('000844A0','42','147');
Insert into T (PI,S,L) values ('000844A0','43','138');
Insert into T (PI,S,L) values ('000844A0','44','147');
Insert into T (PI,S,L) values ('000844A0','45','138');
Insert into T (PI,S,L) values ('000844A0','46','147');
Insert into T (PI,S,L) values ('000844A0','47','138');
Insert into T (PI,S,L) values ('000844A0','48','147');
Insert into T (PI,S,L) values ('000844A0','49','138');
Insert into T (PI,S,L) values ('000844A0','50','147');
Insert into T (PI,S,L) values ('000844A0','51','138');
Insert into T (PI,S,L) values ('000844A0','52','147');
I would like to create buckets by partitioning by PI and the running sum of L+2 (sorting by S) with the sum being limited to a maximum of 2000. So the query I search for would produce following buckets (in an additional column I), column SU shows the running sum of L+2 (for information only):
PI S L SU I
------------ ---------- ---------- ---------- ----------
000740A0 1 54 56 0
000740A0 2 103 161 0
000740A0 3 102 265 0
000740A0 4 102 369 0
000740A0 5 173 544 0
000740A0 6 174 720 0
000740A0 7 120 842 0
000740A0 8 156 1000 0
000740A0 9 102 1104 0
000740A0 10 119 1225 0
000740A0 11 102 1329 0
000740A0 12 99 1430 0
000740A0 13 119 1551 0
000740A0 14 148 1701 0
000740A0 15 102 1805 0
000740A0 16 100 1907 0
000740A0 17 119 119 1
000740A0 18 100 221 1
000740A0 19 97 320 1
000740A0 20 119 441 1
000740A0 21 100 543 1
000740A0 22 118 663 1
000740A0 23 146 811 1
000740A0 24 120 933 1
000740A0 25 127 1062 1
000740A0 26 124 1188 1
000740A0 27 134 1324 1
000844A0 1 54 56 0
000844A0 2 103 161 0
000844A0 3 102 265 0
000844A0 4 95 362 0
000844A0 5 102 466 0
000844A0 6 99 567 0
000844A0 7 95 664 0
000844A0 8 102 768 0
000844A0 9 133 903 0
000844A0 10 173 1078 0
000844A0 11 120 1200 0
000844A0 12 143 1345 0
000844A0 13 156 1503 0
000844A0 14 144 1649 0
000844A0 15 139 1790 0
000844A0 16 147 1939 0
000844A0 17 131 131 1
000844A0 18 132 265 1
000844A0 19 150 417 1
000844A0 20 128 547 1
000844A0 21 120 669 1
000844A0 22 104 775 1
000844A0 23 123 900 1
000844A0 24 104 1006 1
000844A0 25 101 1109 1
000844A0 26 123 1234 1
000844A0 27 104 1340 1
000844A0 28 147 1489 1
000844A0 29 100 1591 1
000844A0 30 150 1743 1
000844A0 31 117 1862 1
000844A0 32 129 1993 1
000844A0 33 114 114 2
000844A0 34 147 263 2
000844A0 35 138 403 2
000844A0 36 147 552 2
000844A0 37 138 692 2
000844A0 38 147 841 2
000844A0 39 138 981 2
000844A0 40 147 1130 2
000844A0 41 138 1270 2
000844A0 42 147 1419 2
000844A0 43 138 1559 2
000844A0 44 147 1708 2
000844A0 45 138 1848 2
000844A0 46 147 1997 2
000844A0 47 138 138 3
000844A0 48 147 287 3
000844A0 49 138 427 3
000844A0 50 147 576 3
000844A0 51 138 716 3
000844A0 52 147 865 3
Do you see any possibility to solve this without PL/SQL?
The problem is that using an analytic running sum and then using a division and a trunc or a width_bucket works for I=0 but not for the following buckets, because of the remainder. So the following buckets are > 2000...
So far I do it with following PL/SQL:
declare
al integer;
gi integer;
lpi varchar2(200) := null;
begin
for hl in (
select
pi, l, su, i, rowid rid
from t
order by pi, s
for update of su, i
)
loop
if lpi is null or hl.pi<>lpi then
lpi := hl.pi;
gi := 0;
al := 0;
end if;
al := al + hl.l+2;
if al > 2000 then
gi := gi + 1;
al := hl.l+2;
end if;
update t set su=al, i=gi where rowid=hl.rid;
end loop;
end;
But that is slow (2475s).
Best Regards
Martin