Skip to Main Content

Breadcrumb

Question and Answer

Connor McDonald

Thanks for the question, Livio.

Asked: May 13, 2016 - 3:00 pm UTC

Last updated: May 18, 2016 - 4:28 pm UTC

Version: 11.2.0.4.0

Viewed 1000+ times

You Asked

I create and populate the following table:


create table plch_test(
start_date date, end_date date, salary number);

insert into  plch_test
(start_date, end_date, salary)
values
(to_date('01/01/2016 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('04/30/2016 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2015 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('12/31/2015 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2015 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('02/01/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2011 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('01/31/2012 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 13421.7);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2011 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('06/16/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12880.45);
  
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/15/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 13421.7);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('11/01/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2010 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12881.39);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('10/31/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12361.93);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2009 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12361.93);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 12361.93);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2007 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2008 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2007 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('05/01/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('04/30/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11843.41);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2005 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2006 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2005 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('05/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('04/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('04/30/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 11324.9);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('01/01/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('03/31/2004 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 10806.37);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('07/01/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('12/31/2003 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 10806.37);
   
insert into  plch_test
(start_date, end_date, salary)
values
(to_date('04/16/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), to_date('06/30/2002 00:00:00', 'MM/DD/YYYY HH24:MI:SS'), 10806.37);
   
commit;



Ordering the table content by start_date, I would like to retrieve the following result set:



select grp, start_date, end_date, salary
from plch_test
order by start_date desc

GRP  START_DATE    END_DATE      SALARY
1    01/01/2016    30/04/2016    12880.45    
1    01/07/2015    31/12/2015    12880.45    
1    01/07/2012    30/06/2015    12880.45    
1    01/02/2012    30/06/2012    12880.45    
2    01/07/2011    31/01/2012    13421.7    
2    01/07/2010    30/06/2011    12880.45    
2    16/06/2010    30/06/2010    12880.45    
3    01/04/2010    15/06/2010    13421.7    
3    01/11/2009    31/03/2010    12881.39    
3    01/07/2009    31/10/2009    12361.93    
3    01/07/2008    30/06/2009    12361.93    
3    01/04/2008    30/06/2008    12361.93    
3    01/07/2007    31/03/2008    11843.41    
3    01/07/2006    30/06/2007    11843.41    
3    01/05/2006    30/06/2006    11843.41    
3    01/04/2006    30/04/2006    11843.41    
3    01/07/2005    31/03/2006    11324.9    
3    01/07/2004    30/06/2005    11324.9    
3    01/05/2004    30/06/2004    11324.9    
3    01/04/2004    30/04/2004    11324.9    
3    01/01/2004    31/03/2004    10806.37    
3    01/07/2002    31/12/2003    10806.37    
3    16/04/2002    30/06/2002    10806.37    



Briefly, what I want to achieve is assign each row to a different group (bucket) whenever the salary decreases.
I assume I can do this by means of an analytic function.

Could you help me out design column grp?

Thanks in advance
Livio

and Chris said...

It's a shame you're not on 12c. If you were, you could do this easily with pattern matching:

select * from plch_test
match_recognize (
  order by start_date 
  measures match_number() grp
  all rows per match
  pattern ( not_down* )
  define
    not_down as (
      not_down.salary >= prev(not_down.salary) or 
      not_down.start_date = first(not_down.start_date)
    )
)
order by start_date ;

START_DATE                  GRP END_DATE                 SALARY
-------------------- ---------- -------------------- ----------
16-APR-2002 00:00:00          1 30-JUN-2002 00:00:00   10806.37
01-JUL-2002 00:00:00          1 31-DEC-2003 00:00:00   10806.37
01-JAN-2004 00:00:00          1 31-MAR-2004 00:00:00   10806.37
01-APR-2004 00:00:00          1 30-APR-2004 00:00:00    11324.9
01-MAY-2004 00:00:00          1 30-JUN-2004 00:00:00    11324.9
01-JUL-2004 00:00:00          1 30-JUN-2005 00:00:00    11324.9
01-JUL-2005 00:00:00          1 31-MAR-2006 00:00:00    11324.9
01-APR-2006 00:00:00          1 30-APR-2006 00:00:00   11843.41
01-MAY-2006 00:00:00          1 30-JUN-2006 00:00:00   11843.41
01-JUL-2006 00:00:00          1 30-JUN-2007 00:00:00   11843.41
01-JUL-2007 00:00:00          1 31-MAR-2008 00:00:00   11843.41
01-APR-2008 00:00:00          1 30-JUN-2008 00:00:00   12361.93
01-JUL-2008 00:00:00          1 30-JUN-2009 00:00:00   12361.93
01-JUL-2009 00:00:00          1 31-OCT-2009 00:00:00   12361.93
01-NOV-2009 00:00:00          1 31-MAR-2010 00:00:00   12881.39
01-APR-2010 00:00:00          1 15-JUN-2010 00:00:00    13421.7
16-JUN-2010 00:00:00          2 30-JUN-2010 00:00:00   12880.45
01-JUL-2010 00:00:00          2 30-JUN-2011 00:00:00   12880.45
01-JUL-2011 00:00:00          2 31-JAN-2012 00:00:00    13421.7
01-FEB-2012 00:00:00          3 30-JUN-2012 00:00:00   12880.45
01-JUL-2012 00:00:00          3 30-JUN-2015 00:00:00   12880.45
01-JUL-2015 00:00:00          3 31-DEC-2015 00:00:00   12880.45
01-JAN-2016 00:00:00          3 30-APR-2016 00:00:00   12880.45


As you're not, here's one way to do it:

- Use lag() to find the value of the previous previous salary
- If this is greater than the previous row or the first row, return the rownum. Otherwise return null
- Use last_value to return the previous non-null value from this calculation

select start_date, end_date, salary, grp,
       last_value(grp) ignore nulls over (order by start_date) grps
from (
  select start_date, end_date, salary, 
         case
           when lg_sal > salary or lg_sal is null then rownum
         end grp
  from (
    select start_date, end_date, salary, 
           lag(salary) over (order by start_date) lg_sal
    from   plch_test
  )
);

START_DATE           END_DATE                 SALARY        GRP       GRPS
-------------------- -------------------- ---------- ---------- ----------
16-APR-2002 00:00:00 30-JUN-2002 00:00:00   10806.37          1          1
01-JUL-2002 00:00:00 31-DEC-2003 00:00:00   10806.37                     1
01-JAN-2004 00:00:00 31-MAR-2004 00:00:00   10806.37                     1
01-APR-2004 00:00:00 30-APR-2004 00:00:00    11324.9                     1
01-MAY-2004 00:00:00 30-JUN-2004 00:00:00    11324.9                     1
01-JUL-2004 00:00:00 30-JUN-2005 00:00:00    11324.9                     1
01-JUL-2005 00:00:00 31-MAR-2006 00:00:00    11324.9                     1
01-APR-2006 00:00:00 30-APR-2006 00:00:00   11843.41                     1
01-MAY-2006 00:00:00 30-JUN-2006 00:00:00   11843.41                     1
01-JUL-2006 00:00:00 30-JUN-2007 00:00:00   11843.41                     1
01-JUL-2007 00:00:00 31-MAR-2008 00:00:00   11843.41                     1
01-APR-2008 00:00:00 30-JUN-2008 00:00:00   12361.93                     1
01-JUL-2008 00:00:00 30-JUN-2009 00:00:00   12361.93                     1
01-JUL-2009 00:00:00 31-OCT-2009 00:00:00   12361.93                     1
01-NOV-2009 00:00:00 31-MAR-2010 00:00:00   12881.39                     1
01-APR-2010 00:00:00 15-JUN-2010 00:00:00    13421.7                     1
16-JUN-2010 00:00:00 30-JUN-2010 00:00:00   12880.45         17         17
01-JUL-2010 00:00:00 30-JUN-2011 00:00:00   12880.45                    17
01-JUL-2011 00:00:00 31-JAN-2012 00:00:00    13421.7                    17
01-FEB-2012 00:00:00 30-JUN-2012 00:00:00   12880.45         20         20
01-JUL-2012 00:00:00 30-JUN-2015 00:00:00   12880.45                    20
01-JUL-2015 00:00:00 31-DEC-2015 00:00:00   12880.45                    20
01-JAN-2016 00:00:00 30-APR-2016 00:00:00   12880.45                    20


If you want the groups to have no gaps (1, 2, 3, etc.), calculate the dense_rank() of the rows ordered by grps.

Rating

  (7 ratings)

Is this answer out of date? If it is, please let us know via a Comment

Comments

Performance Impact

A reader, May 16, 2016 - 6:51 pm UTC

I don't have 12c so can't benchmark.
But performance wise which one is better patterning matching of 12c OR the analytical functional lag
Chris Saxon
May 17, 2016 - 4:44 pm UTC

They both only access the table once, so there's no difference there. On such a small data set they'll both perform well enough.

I'm not aware of any particular advantage of one over the other for general solutions.

Other options | SQL Modelling

Rajeshwaran, Jeyabal, May 17, 2016 - 7:12 am UTC

rajesh@ORA12C> select *
  2  from plch_test
  3  model
  4    dimension by ( row_number() over(order by start_date) rn)
  5    measures ( start_date, end_date, salary, 0 grp)
  6    rules (
  7      grp[any] order by rn = case when cv(rn) = 1 then 1
  8                  when salary[cv()] >= salary[cv()-1]
  9                  then grp[cv()-1]
 10                  else grp[cv()-1]+1 end )
 11  /

        RN START_DATE  END_DATE        SALARY        GRP
---------- ----------- ----------- ---------- ----------
         1 16-APR-2002 30-JUN-2002   10806.37          1
         2 01-JUL-2002 31-DEC-2003   10806.37          1
         3 01-JAN-2004 31-MAR-2004   10806.37          1
         4 01-APR-2004 30-APR-2004    11324.9          1
         5 01-MAY-2004 30-JUN-2004    11324.9          1
         6 01-JUL-2004 30-JUN-2005    11324.9          1
         7 01-JUL-2005 31-MAR-2006    11324.9          1
         8 01-APR-2006 30-APR-2006   11843.41          1
         9 01-MAY-2006 30-JUN-2006   11843.41          1
        10 01-JUL-2006 30-JUN-2007   11843.41          1
        11 01-JUL-2007 31-MAR-2008   11843.41          1
        12 01-APR-2008 30-JUN-2008   12361.93          1
        13 01-JUL-2008 30-JUN-2009   12361.93          1
        14 01-JUL-2009 31-OCT-2009   12361.93          1
        15 01-NOV-2009 31-MAR-2010   12881.39          1
        16 01-APR-2010 15-JUN-2010    13421.7          1
        17 16-JUN-2010 30-JUN-2010   12880.45          2
        18 01-JUL-2010 30-JUN-2011   12880.45          2
        19 01-JUL-2011 31-JAN-2012    13421.7          2
        20 01-FEB-2012 30-JUN-2012   12880.45          3
        21 01-JUL-2012 30-JUN-2015   12880.45          3
        22 01-JUL-2015 31-DEC-2015   12880.45          3
        23 01-JAN-2016 30-APR-2016   12880.45          3

23 rows selected.

rajesh@ORA12C>

Performance impact

Rajeshwaran, Jeyabal, May 18, 2016 - 7:57 am UTC

With pattern matching you can see reduced CPU utilization compared to Analytic.

select start_date, id end_date, salary, grp,
       last_value(grp) ignore nulls over (order by start_date,id) grps
from (
  select start_date, end_date, salary, id,
         case
           when lg_sal > salary or lg_sal is null then rownum
         end grp
  from (
    select start_date, end_date, salary, id,
           lag(salary) over (order by start_date,id) lg_sal
    from   plch_test
  ) )
order by start_date,id  

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch     6668      3.86       6.31       8479       8537          2     1000000
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total     6670      3.86       6.31       8479       8537          2     1000000

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 111  
Number of plan statistics captured: 1

Rows (1st) Rows (avg) Rows (max)  Row Source Operation
---------- ---------- ----------  ---------------------------------------------------
   1000000    1000000    1000000  WINDOW SORT (cr=8537 pr=8479 pw=3690 time=5170032 us cost=9284 size=39000000 card=1000000)
   1000000    1000000    1000000   VIEW  (cr=8537 pr=4276 pw=0 time=3149176 us cost=9284 size=39000000 card=1000000)
   1000000    1000000    1000000    COUNT  (cr=8537 pr=4276 pw=0 time=2727056 us)
   1000000    1000000    1000000     VIEW  (cr=8537 pr=4276 pw=0 time=2547226 us cost=9284 size=48000000 card=1000000)
   1000000    1000000    1000000      WINDOW SORT (cr=8537 pr=4276 pw=0 time=2275369 us cost=9284 size=18000000 card=1000000)
   1000000    1000000    1000000       TABLE ACCESS FULL PLCH_TEST (cr=8537 pr=4276 pw=0 time=1332536 us cost=1641 size=18000000 card=1000000)


Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  SQL*Net message to client                    6668        0.00          0.00
  direct path read                               98        0.14          0.58
  Disk file operations I/O                        1        0.02          0.02
  direct path write temp                         78        0.16          0.50
  direct path read temp                        1059        0.14          1.37
  SQL*Net message from client                  6668        0.00          4.08
********************************************************************************

select * from plch_test
match_recognize (
  order by start_date,id
  measures match_number() grp
  all rows per match
  pattern ( not_down* )
  define
    not_down as (
      not_down.salary >= prev(not_down.salary) or
      not_down.start_date = first(not_down.start_date)
    )
)
order by start_date,id 

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch     6668      2.93       6.51      18118       8537          7     1000000
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total     6670      2.93       6.51      18118       8537          7     1000000

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 111  
Number of plan statistics captured: 1

Rows (1st) Rows (avg) Rows (max)  Row Source Operation
---------- ---------- ----------  ---------------------------------------------------
   1000000    1000000    1000000  SORT ORDER BY (cr=8537 pr=18118 pw=13842 time=5956937 us cost=27053 size=57000000 card=1000000)
   1000000    1000000    1000000   VIEW  (cr=8537 pr=13168 pw=8892 time=3211421 us cost=11036 size=57000000 card=1000000)
   1000000    1000000    1000000    MATCH RECOGNIZE SORT DETERMINISTIC FINITE AUTOMATON (cr=8537 pr=13168 pw=8892 time=2937258 us cost=11036 size=26000000 card=1000000)
   1000000    1000000    1000000     TABLE ACCESS FULL PLCH_TEST (cr=8537 pr=4276 pw=0 time=1291629 us cost=1641 size=26000000 card=1000000)


Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  SQL*Net message to client                    6668        0.00          0.00
  direct path read                              115        0.01          0.28
  direct path write temp                         99        0.13          1.05
  direct path read temp                        1185        0.36          2.17
  SQL*Net message from client                  6668        0.00          4.05
********************************************************************************

Check it against your data sets.
Connor McDonald
May 18, 2016 - 2:20 pm UTC

You also see more temp activity and nearly identical elapsed times. This is consistent with my findings (match_recognize having less CPU, but more temp):

SQL ID: 5s15dgmvpwxps Plan Hash: 2516121343

SELECT START_DATE, ID END_DATE, SALARY, GRP, LAST_VALUE(GRP) IGNORE NULLS
  OVER (ORDER BY START_DATE,ID) GRPS
FROM
 ( SELECT START_DATE, END_DATE, SALARY, ID, CASE WHEN LG_SAL > SALARY OR
  LG_SAL IS NULL THEN ROWNUM END GRP FROM ( SELECT START_DATE, END_DATE,
  SALARY, ID, LAG(SALARY) OVER (ORDER BY START_DATE,ID) LG_SAL FROM PLCH_TEST
  ) ) ORDER BY START_DATE,ID


call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          4          1           0
Execute      1      0.00       0.00          0          0          0           0
Fetch    10001      3.84       4.03       4186       4432          0     1000000
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total    10003      3.84       4.03       4186       4436          1     1000000

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 121     (recursive depth: 1)
Number of plan statistics captured: 1

Rows (1st) Rows (avg) Rows (max)  Row Source Operation
---------- ---------- ----------  ---------------------------------------------------
   1000000    1000000    1000000  WINDOW SORT (cr=4432 pr=4186 pw=0 time=4028553 us cost=10226 size=46556784 card=969933)
   1000000    1000000    1000000   VIEW  (cr=4432 pr=4186 pw=0 time=3792517 us cost=10226 size=46556784 card=969933)
   1000000    1000000    1000000    COUNT  (cr=4432 pr=4186 pw=0 time=3157672 us)
   1000000    1000000    1000000     VIEW  (cr=4432 pr=4186 pw=0 time=2735936 us cost=10226 size=46556784 card=969933)
   1000000    1000000    1000000      WINDOW SORT (cr=4432 pr=4186 pw=0 time=2182366 us cost=10226 size=33947655 card=969933)
   1000000    1000000    1000000       TABLE ACCESS FULL PLCH_TEST (cr=4432 pr=4186 pw=0 time=250604 us cost=1227 size=33947655 card=969933)


Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  db file scattered read                        102        0.00          0.15
  db file sequential read                         2        0.00          0.00

****
****

SELECT *
FROM
 PLCH_TEST MATCH_RECOGNIZE ( ORDER BY START_DATE,ID MEASURES MATCH_NUMBER()
  GRP ALL ROWS PER MATCH PATTERN ( NOT_DOWN* ) DEFINE NOT_DOWN AS (
  NOT_DOWN.SALARY >= PREV(NOT_DOWN.SALARY) OR NOT_DOWN.START_DATE =
  FIRST(NOT_DOWN.START_DATE) ) ) ORDER BY START_DATE,ID


call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          1          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch    10001      2.78       6.13       4951       4432          2     1000000
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total    10003      2.78       6.13       4951       4433          2     1000000

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 121     (recursive depth: 1)
Number of plan statistics captured: 1

Rows (1st) Rows (avg) Rows (max)  Row Source Operation
---------- ---------- ----------  ---------------------------------------------------
   1000000    1000000    1000000  SORT ORDER BY (cr=4432 pr=4951 pw=4951 time=10371755 us cost=25452 size=55286181 card=969933)
   1000000    1000000    1000000   VIEW  (cr=4432 pr=0 pw=0 time=7563097 us cost=12057 size=55286181 card=969933)
   1000000    1000000    1000000    MATCH RECOGNIZE SORT DETERMINISTIC FINITE AUTOMATON (cr=4432 pr=0 pw=0 time=7068406 us cost=12057 size=42677052 card=969933)
   1000000    1000000    1000000     TABLE ACCESS FULL PLCH_TEST (cr=4432 pr=0 pw=0 time=257323 us cost=1227 size=42677052 card=969933)


Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  Disk file operations I/O                        1        0.00          0.00
  direct path write temp                         85        0.18          3.26
  direct path read temp                          87        0.00          0.06


So there isn't a clear cut winner.

Model Odd.

Rajeshwaran, Jeyabal, May 18, 2016 - 8:12 am UTC

Team - any reason why model looks odd? i doesn't perform close to Analytic .

With 1M rows, Analytic done in six second, while model running close to 30 min but doesn't completed(i killed the session). i could see it is spending more time with reading data set from temp (kind of sorting again and again and again) - why so ?

Any help to bring model queries close to analytic ?

select *
  from plch_test
  model
    dimension by ( row_number() over(order by start_date,id) rn)
    measures ( id,start_date, end_date, salary, 0 grp)
    rules (
      grp[any] order by rn = case when cv(rn) = 1 then 1
                  when salary[cv()] >= salary[cv()-1]
                  then grp[cv()-1]
                  else grp[cv()-1]+1 end )
order by start_date,id

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.01       0.00          0          0          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch        1     62.57    1770.57    2216428       8537          3           0
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        3     62.58    1770.57    2216428       8537          3           0

Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 111  
Number of plan statistics captured: 1

Rows (1st) Rows (avg) Rows (max)  Row Source Operation
---------- ---------- ----------  ---------------------------------------------------
         0          0          0  SORT ORDER BY (cr=0 pr=0 pw=0 time=14 us cost=20431 size=26000000 card=1000000)
         0          0          0   SQL MODEL ORDERED (cr=0 pr=0 pw=0 time=11 us cost=20431 size=26000000 card=1000000)
   1000000    1000000    1000000    WINDOW SORT (cr=8537 pr=8732 pw=4456 time=5710338 us cost=20431 size=26000000 card=1000000)
   1000000    1000000    1000000     TABLE ACCESS FULL PLCH_TEST (cr=8537 pr=4276 pw=0 time=1293208 us cost=1641 size=26000000 card=1000000)


Elapsed times include waiting on following events:
  Event waited on                             Times   Max. Wait  Total Waited
  ----------------------------------------   Waited  ----------  ------------
  SQL*Net message to client                       1        0.00          0.00
  direct path read                              105        0.03          0.36
  direct path write temp                      53192        0.56        275.19
  direct path read temp                       71558        1.46       1394.43


Incase if you need DDL to play with scripts below (big_table is just a copy of all_objects again and again to make it really big)

create table plch_test(id int primary key, start_date date, end_date date, salary number);
insert /*+ append */ into plch_test(id,start_date,end_date,salary)
select id,sysdate + mod(rownum,100), sysdate + dbms_random.value, object_id
from big_table
where id <= 1000000;
commit;


This is not something new, i had similar bad experience with comparing model and analytic.

https://asktom.oracle.com/pls/apex/f?p=100:11:0::::P11_QUESTION_ID:9526697800346367340#9526755900346482419
Chris Saxon
May 18, 2016 - 2:34 pm UTC

That's the nature of model - it tends to use more temp than the other solutions.

Model Odd.

Rajeshwaran, Jeyabal, May 18, 2016 - 2:42 pm UTC

That's the nature of model - it tends to use more temp than the other solutions.

Oh God. i thought model was cool, but now i need to stay away from them.

Any ideas on optimizing model for upcoming versions ?
Chris Saxon
May 18, 2016 - 4:28 pm UTC

I'm not aware of any.

Optimization to SQL Model Clause

Rajeshwaran, Jeyabal, May 20, 2016 - 2:27 pm UTC

Team - Took this to Database ideas forum

https://community.oracle.com/ideas/13061

Please cast your vote to support us.

"If you want the groups to have no gaps (1, 2, 3, etc.)"

Stew Ashton, July 18, 2016 - 1:21 am UTC

Instead of rownum/null + last_value(), you can use 1/0 and sum() to get the same result but without gaps.
select start_date, end_date, salary, grp,
  sum(grp) over(order by start_date) grps
from (
  select start_date, end_date, salary,
    case
      when lag(salary,1,salary+1) over (order by start_date) > salary
      then 1 else 0 end grp
  from plch_test
);

START_DATE END_DATE       SALARY        GRP       GRPS
---------- ---------- ---------- ---------- ----------
2002-04-16 2002-06-30   10806.37          1          1
2002-07-01 2003-12-31   10806.37          0          1
2004-01-01 2004-03-31   10806.37          0          1
2004-04-01 2004-04-30    11324.9          0          1
2004-05-01 2004-06-30    11324.9          0          1
2004-07-01 2005-06-30    11324.9          0          1
2005-07-01 2006-03-31    11324.9          0          1
2006-04-01 2006-04-30   11843.41          0          1
2006-05-01 2006-06-30   11843.41          0          1
2006-07-01 2007-06-30   11843.41          0          1
2007-07-01 2008-03-31   11843.41          0          1
2008-04-01 2008-06-30   12361.93          0          1
2008-07-01 2009-06-30   12361.93          0          1
2009-07-01 2009-10-31   12361.93          0          1
2009-11-01 2010-03-31   12881.39          0          1
2010-04-01 2010-06-15    13421.7          0          1
2010-06-16 2010-06-30   12880.45          1          2
2010-07-01 2011-06-30   12880.45          0          2
2011-07-01 2012-01-31    13421.7          0          2
2012-02-01 2012-06-30   12880.45          1          3
2012-07-01 2015-06-30   12880.45          0          3
2015-07-01 2015-12-31   12880.45          0          3
2016-01-01 2016-04-30   12880.45          0          3

More to Explore

Analytics

Analytic SQL got you confused? Check out Connor McDonald's complete video course.