create table test_dept
(dept_id number,
dept_name varchar2(50),
dept_region number);
alter table test_dept
add constraint pk primary key (dept_id);
create table test_emp
(emp_id number,
sal number,
dept_id number ,
constraint fk foreign key (dept_id) references test_dept(dept_id));
insert into test_dept values (10,'IT', 1);
insert into test_dept values (20,'ADMIN', 1);
insert into test_dept values (30,'SUPPORT', 2);
insert into test_dept values (40,'SERVICES', 3);
COMMIT;
SELECT * FROM test_dept;
insert into test_emp values (101,1000, 10);
insert into test_emp values (102,6000, 30);
insert into test_emp values (103,8000, 10);
insert into test_emp values (104,5000, 20);
insert into test_emp values (105,1000, 10);
insert into test_emp values (106,6000, 30);
insert into test_emp values (107,8000, 10);
insert into test_emp values (108,5000, 10);
insert into test_emp values (109,1000, 10);
insert into test_emp values (110,6000, 30);
insert into test_emp values (111,8000, 20);
insert into test_emp values (112,5000, 10);
insert into test_emp values (113,1000, 10);
insert into test_emp values (114,6000, 20);
insert into test_emp values (115,8000, 10);
SELECT * FROM test_emp;
SELECT dept_id, min(sal), max(sal)
from test_emp e
where exists (select 1 from test_dept d where e.dept_id = d.dept_id and dept_region in (1,2))
group by dept_id;
Plan Hash Value : 605905731
----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost | Time |
----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | | | | |
| 1 | SORT GROUP BY | | | | | |
| 2 | NESTED LOOPS | | | | | |
| 3 | NESTED LOOPS | | | | | |
| 4 | TABLE ACCESS FULL | TEST_EMP | | | | |
| * 5 | INDEX UNIQUE SCAN | PK | | | | |
| * 6 | TABLE ACCESS BY INDEX ROWID | TEST_DEPT | | | | |
----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
------------------------------------------
* 5 - access("E"."DEPT_ID"="D"."DEPT_ID")
* 6 - filter("DEPT_REGION"=1 OR "DEPT_REGION"=2)
Now consider that the test_dept has 414 records for the given dept_region and test_emp has 3 million records, in that case the explain plan gives something like
--------------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost | Time |
--------------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 5124 | 97356 | 329559 | 01:16:54 |
| 1 | SORT GROUP BY NOSORT | | 5124 | 97356 | 329559 | 01:16:54 |
| 2 | NESTED LOOPS | | 154913245 | 2943351655 | 321286 | 01:14:59 |
| 3 | SORT UNIQUE | | 414 | 2898 | 35 | 00:00:01 |
| * 4 | TABLE ACCESS FULL | TEST_DEPT | 414 | 2898 | 35 | 00:00:01 |
| 5 | PARTITION RANGE ALL | | 374639 | 4495668 | 1552 | 00:00:22 |
| * 6 | INDEX RANGE SCAN | PK | 374639 | 4495668 | 1552 | 00:00:22 |
--------------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
------------------------------------------
* 4 - filter("D"."DEPT_REGION"=1 OR "D"."DEPT_REGION"=2)
* 6 - access("D"."DEPT_ID"="E"."DEPT_ID")
The execution of the query takes around 12 mnins.
can you help me on reducing this time??
OK, here's my replica (which may differ to your real version) but hopefully will still help
SQL> create table test_dept
2 (dept_id number,
3 dept_name varchar2(50),
4 dept_region number);
Table created.
SQL>
SQL>
SQL> create table test_emp
2 (emp_id number,
3 sal number,
4 dept_id number);
Table created.
SQL>
SQL> insert into test_dept
2 select rownum*10, 'dept'||(rownum*10), mod(rownum,50)
3 from dual connect by level <= 50*400;
20000 rows created.
SQL>
SQL> insert /*+ APPEND */ into test_emp
2 select rownum, rownum, (1+mod(rownum,18000))*10
3 from ( select 1 from dual connect by level < 1000 ),
4 ( select 1 from dual connect by level < 3000 );
2996001 rows created.
SQL>
SQL> alter table test_dept
2 add constraint pk primary key (dept_id);
Table altered.
SQL>
SQL> alter table test_emp
2 add constraint fk foreign key (dept_id) references test_dept(dept_id);
Table altered.
SQL>
SQL>
SQL> exec dbms_stats.gather_table_stats('','test_emp');
PL/SQL procedure successfully completed.
SQL> exec dbms_stats.gather_table_stats('','test_dept');
PL/SQL procedure successfully completed.
So I have 20,000 departments, 50 regions, and 3m employees. Your query is logically equivalent to:
SQL> SELECT e.dept_id, min(sal), max(sal)
2 from test_emp e,
3 test_Dept d
4 where e.dept_id = d.dept_id
5 and d.dept_region in (1,2)
6 group by e.dept_id;
DEPT_ID MIN(SAL) MAX(SAL)
---------- ---------- ----------
2010 200 2988200
3510 350 2988350
4020 401 2988401
...
720 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 379542677
---------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 18130 | 336K| 2486 (3)| 00:00:01 |
| 1 | HASH GROUP BY | | 18130 | 336K| 2486 (3)| 00:00:01 |
|* 2 | HASH JOIN | | 132K| 2452K| 2480 (2)| 00:00:01 |
|* 3 | TABLE ACCESS FULL| TEST_DEPT | 800 | 6400 | 19 (0)| 00:00:01 |
| 4 | TABLE ACCESS FULL| TEST_EMP | 2996K| 31M| 2447 (2)| 00:00:01 |
---------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
2 - access("E"."DEPT_ID"="D"."DEPT_ID")
3 - filter("D"."DEPT_REGION"=1 OR "D"."DEPT_REGION"=2)
Statistics
----------------------------------------------------------
54 recursive calls
10 db block gets
8920 consistent gets
0 physical reads
0 redo size
21334 bytes sent via SQL*Net to client
1125 bytes received via SQL*Net from client
49 SQL*Net roundtrips to/from client
6 sorts (memory)
0 sorts (disk)
720 rows processed
<code>
So already, 9000 consistent gets in the worst case (lets say all physical multiblock reads IO at 10ms) is only a couple of seconds. I could look at indexing the relevant columns to avoid TEST_EMP table access, eg
<code>
SQL> create index test_emp_ix on test_emp ( dept_id, sal );
Index created.
SQL> SELECT e.dept_id, min(sal), max(sal)
2 from test_emp e,
3 test_Dept d
4 where e.dept_id = d.dept_id
5 and d.dept_region in (1,2)
6 group by e.dept_id;
DEPT_ID MIN(SAL) MAX(SAL)
---------- ---------- ----------
2010 200 2988200
3510 350 2988350
4020 401 2988401
...
720 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 496026648
-----------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 18130 | 336K| 1627 (1)| 00:00:01 |
| 1 | HASH GROUP BY | | 18130 | 336K| 1627 (1)| 00:00:01 |
| 2 | NESTED LOOPS | | 132K| 2452K| 1621 (1)| 00:00:01 |
|* 3 | TABLE ACCESS FULL| TEST_DEPT | 800 | 6400 | 19 (0)| 00:00:01 |
|* 4 | INDEX RANGE SCAN | TEST_EMP_IX | 165 | 1815 | 2 (0)| 00:00:01 |
-----------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - filter("D"."DEPT_REGION"=1 OR "D"."DEPT_REGION"=2)
4 - access("E"."DEPT_ID"="D"."DEPT_ID")
Statistics
----------------------------------------------------------
20 recursive calls
0 db block gets
1931 consistent gets
0 physical reads
0 redo size
21334 bytes sent via SQL*Net to client
1125 bytes received via SQL*Net from client
49 SQL*Net roundtrips to/from client
6 sorts (memory)
0 sorts (disk)
720 rows processed
That gets me down to 1900 gets, and notice we never touched the table because we put SAL into the index. (Obviously you need to weigh that up with the rest of your application).
Interestingly one of your execution plans referenced a partitioning schema, so perhaps you didnt tell me everything, but lets continue along that vein - we could look at reference partitioning, eg
<code>
SQL> create table test_dept
2 (dept_id number primary key,
3 dept_name varchar2(50),
4 dept_region number)
5 partition by hash ( dept_region )
6 partitions 16;
Table created.
SQL>
SQL>
SQL> create table test_emp
2 (emp_id number,
3 sal number,
4 dept_id number not null,
5 constraint fk foreign key (dept_id) references test_dept(dept_id) on delete cascade)
6 partition by reference(fk);
Table created.
SQL>
SQL> insert into test_dept
2 select rownum*10, 'dept'||(rownum*10), mod(rownum,50)
3 from dual connect by level <= 50*400;
20000 rows created.
SQL>
SQL> insert /*+ APPEND */ into test_emp
2 select rownum, rownum, (1+mod(rownum,18000))*10
3 from ( select 1 from dual connect by level < 1000 ),
4 ( select 1 from dual connect by level < 3000 );
2996001 rows created.
SQL>
SQL>
SQL> exec dbms_stats.gather_table_stats('','test_emp');
PL/SQL procedure successfully completed.
SQL> exec dbms_stats.gather_table_stats('','test_dept');
PL/SQL procedure successfully completed.
SQL> SELECT e.dept_id, min(sal), max(sal)
2 from test_emp e,
3 test_Dept d
4 where e.dept_id = d.dept_id
5 and d.dept_region in (1,2)
6 group by e.dept_id;
DEPT_ID MIN(SAL) MAX(SAL)
---------- ---------- ----------
2010 200 2988200
3510 350 2988350
4020 401 2988401
...
720 rows selected.
Execution Plan
----------------------------------------------------------
Plan hash value: 2769476757
----------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | Pstart| Pstop |
----------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 18130 | 336K| 3057 (2)| 00:00:01 | | |
| 1 | HASH GROUP BY | | 18130 | 336K| 3057 (2)| 00:00:01 | | |
| 2 | PARTITION HASH INLIST| | 132K| 2452K| 3051 (2)| 00:00:01 |KEY(I) |KEY(I) |
|* 3 | HASH JOIN | | 132K| 2452K| 3051 (2)| 00:00:01 | | |
|* 4 | TABLE ACCESS FULL | TEST_DEPT | 800 | 6400 | 548 (1)| 00:00:01 |KEY(I) |KEY(I) |
| 5 | TABLE ACCESS FULL | TEST_EMP | 2996K| 31M| 2489 (2)| 00:00:01 |KEY(I) |KEY(I) |
----------------------------------------------------------------------------------------------------
Predicate Information (identified by operation id):
---------------------------------------------------
3 - access("E"."DEPT_ID"="D"."DEPT_ID")
4 - filter("D"."DEPT_REGION"=1 OR "D"."DEPT_REGION"=2)
Statistics
----------------------------------------------------------
15 recursive calls
5 db block gets
1719 consistent gets
0 physical reads
132 redo size
21334 bytes sent via SQL*Net to client
1125 bytes received via SQL*Net from client
49 SQL*Net roundtrips to/from client
2 sorts (memory)
0 sorts (disk)
720 rows processed
</code
Now I'm down at 1700 gets without any additional indexes. Once again, obviously you need to weigh that up with the rest of your application.
So plenty of options to explore.