Just comparing the traditional approach (substr/instr) with JSON approach and thing looks promising with JSON based approach.
demo@ORA12C> set feedback off
demo@ORA12C> drop table test purge;
demo@ORA12C> create table test(x int,id varchar2(3),string varchar2(100));
demo@ORA12C> insert into test(x,id,string) values (1,'Id1','Thing1: Sub1, <br>Thing2: Sub7 ,Sub8 , Sub9 <br>Thing3: Sub12');
demo@ORA12C> insert into test(x,id,string) values (2,'Id1','Thing2: Sub6, Sub7, Sub8, <br>Thing3: Sub12 ,Sub13');
demo@ORA12C> commit;
demo@ORA12C> set feedback 6
demo@ORA12C>
demo@ORA12C> col level1 format a20
demo@ORA12C> col level2 format a20
demo@ORA12C> with json_ized as (
2 select x, id, '[' ||
3 replace(
4 replace(
5 replace(
6 json_array(string),
7 ',',
8 '","'
9 ),
10 ': ',
11 '",["'
12 ),
13 ' <br>',
14 '"]],["'
15 )
16 || ']' json_string
17 from test
18 )
19 select x,id, level1, trim(level2) level2
20 from json_ized,
21 json_table(
22 json_string,
23 '$[*]'
24 columns (
25 level1 path '$[0]',
26 nested path '$[1][*]' columns (
27 level2 path '$'
28 )
29 )
30 )
31 where trim(level2) is not null;
X ID LEVEL1 LEVEL2
---------- --- -------------------- --------------------
1 Id1 Thing1 Sub1
1 Id1 Thing2 Sub7
1 Id1 Thing2 Sub8
1 Id1 Thing2 Sub9
1 Id1 Thing3 Sub12
2 Id1 Thing2 Sub6
2 Id1 Thing2 Sub7
2 Id1 Thing2 Sub8
2 Id1 Thing3 Sub12
2 Id1 Thing3 Sub13
10 rows selected.
demo@ORA12C> select x,id,
2 substr( txt, 1,instr(txt,':')-1) as level1 ,
3 trim( substr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,
4 instr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,',',1,r2)+1 ,
5 instr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,',',1,r2+1)-
6 instr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,',',1,r2)-1) ) as level2
7 from (
8 select x, id, string , r,
9 trim(',' from trim(substr( '<br>'||string||'<br>' , instr( '<br>'||string||'<br>' ,'<br>',1,r)+4,
10 instr( '<br>'||string||'<br>' ,'<br>',1,r+1) -
11 instr( '<br>'||string||'<br>' ,'<br>',1,r) -4) ) ) txt
12 from test, lateral( select level r
13 from dual
14 connect by level <= (length(string) - length(replace(string,'<br>')))/4 +1 )
15 ) t1, lateral( select level r2
16 from dual
17 connect by level <= length(txt) - length( replace(txt,','))+1) ;
X ID LEVEL1 LEVEL2
---------- --- -------------------- --------------------
1 Id1 Thing1 Sub1
1 Id1 Thing2 Sub7
1 Id1 Thing2 Sub8
1 Id1 Thing2 Sub9
1 Id1 Thing3 Sub12
2 Id1 Thing2 Sub6
2 Id1 Thing2 Sub7
2 Id1 Thing2 Sub8
2 Id1 Thing3 Sub12
2 Id1 Thing3 Sub13
10 rows selected.
now bench marking with huge data sets
demo@ORA12C> declare
2 l_cnt int;
3 begin
4 select count(*) into l_cnt
5 from test;
6
7 while ( l_cnt <= 1000000)
8 loop
9 dbms_application_info.set_client_info('l_cnt ='||l_cnt);
10 insert into test(x,id,string)
11 select l_cnt + rownum,id,string
12 from test
13 where rownum <= ( 1000000 - l_cnt);
14 exit when sql%rowcount = 0;
15 l_cnt := l_cnt + sql%rowcount;
16 end loop;
17 commit;
18 dbms_application_info.set_client_info('Stats gather');
19 dbms_stats.gather_table_stats(user,'T');
20 end;
21 /
PL/SQL procedure successfully completed.
demo@ORA12C> @tkfilename
D:\APP\VNAMEIT\VIRTUAL\diag\rdbms\ora12c\ora12c\trace\ora12c_ora_11796.trc
demo@ORA12C> @tktrace
PL/SQL procedure successfully completed.
demo@ORA12C> begin
2 for k in ( with json_ized as (
3 select x, id, '[' ||
4 replace(
5 replace(
6 replace(
7 json_array(string),
8 ',',
9 '","'
10 ),
11 ': ',
12 '",["'
13 ),
14 ' <br>',
15 '"]],["'
16 )
17 || ']' json_string
18 from test
19 )
20 select x,id, level1, trim(level2) level2
21 from json_ized,
22 json_table(
23 json_string,
24 '$[*]'
25 columns (
26 level1 path '$[0]',
27 nested path '$[1][*]' columns (
28 level2 path '$'
29 )
30 )
31 )
32 where trim(level2) is not null)
33 loop
34 null ;
35 end loop;
36 end;
37 /
PL/SQL procedure successfully completed.
demo@ORA12C> begin
2 for k in ( select x,id,
3 substr( txt, 1,instr(txt,':')-1) as level1 ,
4 trim( substr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,
5 instr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,',',1,r2)+1 ,
6 instr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,',',1,r2+1)-
7 instr( ','||trim(substr( txt, instr(txt,':')+1))||',' ,',',1,r2)-1) ) as level2
8 from (
9 select x, id, string , r,
10 trim(',' from trim(substr( '<br>'||string||'<br>' , instr( '<br>'||string||'<br>' ,'<br>',1,r)+
11 instr( '<br>'||string||'<br>' ,'<br>',1,r+1) -
12 instr( '<br>'||string||'<br>' ,'<br>',1,r) -4) ) ) txt
13 from test, lateral( select level r
14 from dual
15 connect by level <= (length(string) - length(replace(string,'<br>')))/4 +1 )
16 ) t1, lateral( select level r2
17 from dual
18 connect by level <= length(txt) - length( replace(txt,','))+1) )
19 loop
20 null ;
21 end loop;
22 end;
23 /
PL/SQL procedure successfully completed.
demo@ORA12C> exit
Disconnected from Oracle Database 12c Enterprise Edition Release 12.2.0.1.0 - 64bit Production
and Tkprof shows this.
WITH JSON_IZED AS ( SELECT X, ID, '[' || REPLACE( REPLACE( REPLACE(
JSON_ARRAY(STRING), ',', '","' ), ': ', '",["' ), ' <br>', '"]],["' ) ||
']' JSON_STRING FROM TEST ) SELECT X,ID, LEVEL1, TRIM(LEVEL2) LEVEL2 FROM
JSON_IZED, JSON_TABLE( JSON_STRING, '$[*]' COLUMNS ( LEVEL1 PATH '$[0]',
NESTED PATH '$[1][*]' COLUMNS ( LEVEL2 PATH '$' ) ) ) WHERE TRIM(LEVEL2) IS
NOT NULL
call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.00 0.00 0 1 0 0
Execute 1 0.00 0.00 0 0 0 0
Fetch 50001 22.88 22.78 0 65927 0 5000000
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 50003 22.88 22.79 0 65928 0 5000000
Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 108 (recursive depth: 1)
Number of plan statistics captured: 1
Rows (1st) Rows (avg) Rows (max) Row Source Operation
---------- ---------- ---------- ---------------------------------------------------
5000000 5000000 5000000 NESTED LOOPS (cr=65927 pr=0 pw=0 time=20561168 us starts=1 cost=2771 size=72 card=1)
1000000 1000000 1000000 TABLE ACCESS FULL TEST (cr=65927 pr=0 pw=0 time=880870 us starts=1 cost=2741 size=68 card=1)
5000000 5000000 5000000 JSONTABLE EVALUATION (cr=0 pr=0 pw=0 time=8799197 us starts=1000000)
SELECT X,ID, SUBSTR( TXT, 1,INSTR(TXT,':')-1) AS LEVEL1 , TRIM( SUBSTR( ',
'||TRIM(SUBSTR( TXT, INSTR(TXT,':')+1))||',' , INSTR( ','||TRIM(SUBSTR( TXT,
INSTR(TXT,':')+1))||',' ,',',1,R2)+1 , INSTR( ','||TRIM(SUBSTR( TXT,
INSTR(TXT,':')+1))||',' ,',',1,R2+1)- INSTR( ','||TRIM(SUBSTR( TXT,
INSTR(TXT,':')+1))||',' ,',',1,R2)-1) ) AS LEVEL2
FROM
( SELECT X, ID, STRING , R, TRIM(',' FROM TRIM(SUBSTR(
'<br>'||STRING||'<br>' , INSTR( '<br>'||STRING||'<br>' ,'<br>',1,R)+4,
INSTR( '<br>'||STRING||'<br>' ,'<br>',1,R+1) - INSTR(
'<br>'||STRING||'<br>' ,'<br>',1,R) -4) ) ) TXT FROM TEST, LATERAL( SELECT
LEVEL R FROM DUAL CONNECT BY LEVEL <= (LENGTH(STRING) -
LENGTH(REPLACE(STRING,'<br>')))/4 +1 ) ) T1, LATERAL( SELECT LEVEL R2 FROM
DUAL CONNECT BY LEVEL <= LENGTH(TXT) - LENGTH( REPLACE(TXT,','))+1)
call count cpu elapsed disk query current rows
------- ------ -------- ---------- ---------- ---------- ---------- ----------
Parse 1 0.00 0.00 0 1 0 0
Execute 1 0.00 0.00 0 0 0 0
Fetch 50001 60.06 59.63 0 59608 0 5000000
------- ------ -------- ---------- ---------- ---------- ---------- ----------
total 50003 60.06 59.63 0 59609 0 5000000
Misses in library cache during parse: 1
Optimizer mode: ALL_ROWS
Parsing user id: 108 (recursive depth: 1)
Number of plan statistics captured: 1
Rows (1st) Rows (avg) Rows (max) Row Source Operation
---------- ---------- ---------- ---------------------------------------------------
5000000 5000000 5000000 NESTED LOOPS (cr=59608 pr=0 pw=0 time=42165332 us starts=1 cost=2745 size=94 card=1)
2499999 2499999 2499999 NESTED LOOPS (cr=59608 pr=0 pw=0 time=7747816 us starts=1 cost=2743 size=81 card=1)
1000000 1000000 1000000 TABLE ACCESS FULL TEST (cr=59608 pr=0 pw=0 time=980984 us starts=1 cost=2741 size=68 card=1)
2499999 2499999 2499999 VIEW VW_LAT_4DB60E85 (cr=0 pr=0 pw=0 time=4367489 us starts=1000000 cost=2 size=13 card=1)
2499999 2499999 2499999 CONNECT BY WITHOUT FILTERING (cr=0 pr=0 pw=0 time=3783943 us starts=1000000)
1000000 1000000 1000000 FAST DUAL (cr=0 pr=0 pw=0 time=150859 us starts=1000000 cost=2 size=0 card=1)
5000000 5000000 5000000 VIEW VW_LAT_4DB60E85 (cr=0 pr=0 pw=0 time=22797732 us starts=2499999 cost=2 size=13 card=1)
5000000 5000000 5000000 CONNECT BY WITHOUT FILTERING (cr=0 pr=0 pw=0 time=21648264 us starts=2499999)
2499999 2499999 2499999 FAST DUAL (cr=0 pr=0 pw=0 time=355530 us starts=2499999 cost=2 size=0 card=1)