I create and populate the following table:
create table plch_test(
start_date date, end_date date, salary number);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('01/01/2016 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('04/30/2016 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2015 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('12/31/2015 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2015 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('02/01/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2011 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('01/31/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 13421.7);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2011 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('06/16/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/15/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 13421.7);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('11/01/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12881.39);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('10/31/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12361.93);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12361.93);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12361.93);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2007 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2007 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('05/01/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('04/30/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2005 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2005 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('05/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('04/30/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('01/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 10806.37);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('12/31/2003 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 10806.37);
insert into plch_test
(start_date, end_date, salary)
values
(to_date('04/16/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 10806.37);
commit;
Ordering the table content by start_date, I would like to retrieve the following result set:
select grp, start_date, end_date, salary
from plch_test
order by start_date desc
GRP START_DATE END_DATE SALARY
1 01/01/2016 30/04/2016 12880.45
1 01/07/2015 31/12/2015 12880.45
1 01/07/2012 30/06/2015 12880.45
1 01/02/2012 30/06/2012 12880.45
2 01/07/2011 31/01/2012 13421.7
2 01/07/2010 30/06/2011 12880.45
2 16/06/2010 30/06/2010 12880.45
3 01/04/2010 15/06/2010 13421.7
3 01/11/2009 31/03/2010 12881.39
3 01/07/2009 31/10/2009 12361.93
3 01/07/2008 30/06/2009 12361.93
3 01/04/2008 30/06/2008 12361.93
3 01/07/2007 31/03/2008 11843.41
3 01/07/2006 30/06/2007 11843.41
3 01/05/2006 30/06/2006 11843.41
3 01/04/2006 30/04/2006 11843.41
3 01/07/2005 31/03/2006 11324.9
3 01/07/2004 30/06/2005 11324.9
3 01/05/2004 30/06/2004 11324.9
3 01/04/2004 30/04/2004 11324.9
3 01/01/2004 31/03/2004 10806.37
3 01/07/2002 31/12/2003 10806.37
3 16/04/2002 30/06/2002 10806.37
Briefly, what I want to achieve is assign each row to a different group (bucket) whenever the salary decreases.
I assume I can do this by means of an analytic function.
Could you help me out design column grp?
Thanks in advance
Livio
It's a shame you're not on 12c. If you were, you could do this easily with pattern matching:
select * from plch_test
match_recognize (
order by start_date
measures match_number() grp
all rows per match
pattern ( not_down* )
define
not_down as (
not_down.salary >= prev(not_down.salary) or
not_down.start_date = first(not_down.start_date)
)
)
order by start_date ;
START_DATE GRP END_DATE SALARY
-------------------- ---------- -------------------- ----------
16-APR-2002 00:00:00 1 30-JUN-2002 00:00:00 10806.37
01-JUL-2002 00:00:00 1 31-DEC-2003 00:00:00 10806.37
01-JAN-2004 00:00:00 1 31-MAR-2004 00:00:00 10806.37
01-APR-2004 00:00:00 1 30-APR-2004 00:00:00 11324.9
01-MAY-2004 00:00:00 1 30-JUN-2004 00:00:00 11324.9
01-JUL-2004 00:00:00 1 30-JUN-2005 00:00:00 11324.9
01-JUL-2005 00:00:00 1 31-MAR-2006 00:00:00 11324.9
01-APR-2006 00:00:00 1 30-APR-2006 00:00:00 11843.41
01-MAY-2006 00:00:00 1 30-JUN-2006 00:00:00 11843.41
01-JUL-2006 00:00:00 1 30-JUN-2007 00:00:00 11843.41
01-JUL-2007 00:00:00 1 31-MAR-2008 00:00:00 11843.41
01-APR-2008 00:00:00 1 30-JUN-2008 00:00:00 12361.93
01-JUL-2008 00:00:00 1 30-JUN-2009 00:00:00 12361.93
01-JUL-2009 00:00:00 1 31-OCT-2009 00:00:00 12361.93
01-NOV-2009 00:00:00 1 31-MAR-2010 00:00:00 12881.39
01-APR-2010 00:00:00 1 15-JUN-2010 00:00:00 13421.7
16-JUN-2010 00:00:00 2 30-JUN-2010 00:00:00 12880.45
01-JUL-2010 00:00:00 2 30-JUN-2011 00:00:00 12880.45
01-JUL-2011 00:00:00 2 31-JAN-2012 00:00:00 13421.7
01-FEB-2012 00:00:00 3 30-JUN-2012 00:00:00 12880.45
01-JUL-2012 00:00:00 3 30-JUN-2015 00:00:00 12880.45
01-JUL-2015 00:00:00 3 31-DEC-2015 00:00:00 12880.45
01-JAN-2016 00:00:00 3 30-APR-2016 00:00:00 12880.45
As you're not, here's one way to do it:
- Use lag() to find the value of the previous previous salary
- If this is greater than the previous row or the first row, return the rownum. Otherwise return null
- Use last_value to return the previous non-null value from this calculation
select start_date, end_date, salary, grp,
last_value(grp) ignore nulls over (order by start_date) grps
from (
select start_date, end_date, salary,
case
when lg_sal > salary or lg_sal is null then rownum
end grp
from (
select start_date, end_date, salary,
lag(salary) over (order by start_date) lg_sal
from plch_test
)
);
START_DATE END_DATE SALARY GRP GRPS
-------------------- -------------------- ---------- ---------- ----------
16-APR-2002 00:00:00 30-JUN-2002 00:00:00 10806.37 1 1
01-JUL-2002 00:00:00 31-DEC-2003 00:00:00 10806.37 1
01-JAN-2004 00:00:00 31-MAR-2004 00:00:00 10806.37 1
01-APR-2004 00:00:00 30-APR-2004 00:00:00 11324.9 1
01-MAY-2004 00:00:00 30-JUN-2004 00:00:00 11324.9 1
01-JUL-2004 00:00:00 30-JUN-2005 00:00:00 11324.9 1
01-JUL-2005 00:00:00 31-MAR-2006 00:00:00 11324.9 1
01-APR-2006 00:00:00 30-APR-2006 00:00:00 11843.41 1
01-MAY-2006 00:00:00 30-JUN-2006 00:00:00 11843.41 1
01-JUL-2006 00:00:00 30-JUN-2007 00:00:00 11843.41 1
01-JUL-2007 00:00:00 31-MAR-2008 00:00:00 11843.41 1
01-APR-2008 00:00:00 30-JUN-2008 00:00:00 12361.93 1
01-JUL-2008 00:00:00 30-JUN-2009 00:00:00 12361.93 1
01-JUL-2009 00:00:00 31-OCT-2009 00:00:00 12361.93 1
01-NOV-2009 00:00:00 31-MAR-2010 00:00:00 12881.39 1
01-APR-2010 00:00:00 15-JUN-2010 00:00:00 13421.7 1
16-JUN-2010 00:00:00 30-JUN-2010 00:00:00 12880.45 17 17
01-JUL-2010 00:00:00 30-JUN-2011 00:00:00 12880.45 17
01-JUL-2011 00:00:00 31-JAN-2012 00:00:00 13421.7 17
01-FEB-2012 00:00:00 30-JUN-2012 00:00:00 12880.45 20 20
01-JUL-2012 00:00:00 30-JUN-2015 00:00:00 12880.45 20
01-JUL-2015 00:00:00 31-DEC-2015 00:00:00 12880.45 20
01-JAN-2016 00:00:00 30-APR-2016 00:00:00 12880.45 20
If you want the groups to have no gaps (1, 2, 3, etc.), calculate the dense_rank() of the rows ordered by grps.