Tom -
no luck on getting access to those views. However, I was able to create a small test case for you that (at least on my machine) shows the discrepancy.
Setupdrop table samplenames;
drop table customers;
drop table emails;
drop sequence primkey;
create table samplenames
(name VARCHAR2(10));
insert into samplenames values ('dan');
insert into samplenames values ('joe');
insert into samplenames values ('bob');
insert into samplenames values ('sam');
insert into samplenames values ('weslington');
insert into samplenames values ('sue');
insert into samplenames values ('ann');
insert into samplenames values ('mary');
insert into samplenames values ('pam');
insert into samplenames values ('lucy');
create sequence primkey
start with 1000000
increment by 1;
create table customers as
select primkey.nextval as cust_id,
tmp1.name || tmp2.name as first_name,
tmp3.name || tmp4.name || tmp5.name as last_name
from samplenames tmp1,
samplenames tmp2,
samplenames tmp3,
samplenames tmp4,
samplenames tmp5;
create table emails
(cust_id NUMBER,
use_type VARCHAR2(2),
add_dttm DATE,
email VARCHAR2(50)
);
insert into emails
select cust_id,
'IE',
TO_DATE(TRUNC(DBMS_RANDOM.VALUE(TO_CHAR(trunc(sysdate), 'J'), TO_CHAR(trunc(sysdate), 'J')-20)), 'J'),
substr(first_name,1,1) || last_name || '@fakeemailsrus.com'
from customers;
insert into emails
select cust_id,
'IE',
TO_DATE(TRUNC(DBMS_RANDOM.VALUE(TO_CHAR(trunc(sysdate), 'J'), TO_CHAR(trunc(sysdate), 'J')-20)), 'J'),
last_name || substr(first_name,1,1) || '@fakeemailsrus.com'
from customers;
That gives us 100k customers, each with two emails on a random date.
Each test case is against 25k customers.
Test 1drop table cust_sample;
drop table tmp_emails;
create table cust_sample as
SELECT cust_id, first_name, last_name
FROM customers
where rownum <= 25000;
alter table cust_sample add (email VARCHAR2(50));
CREATE TABLE tmp_emails NOLOGGING
AS
SELECT cust_id,
email,
add_dttm
FROM emails
WHERE use_type = 'IE'
AND cust_id IN (SELECT cust_id FROM cust_sample);
MERGE INTO cust_sample tmp
USING (SELECT DISTINCT cust_id,
FIRST_VALUE(email) OVER (PARTITION BY cust_id ORDER BY add_dttm DESC) AS frst_val
FROM tmp_emails
WHERE REGEXP_LIKE (trim(email),
'^[A-Za-z0-9._%+-]+@[A-Za-z0-9._%+-]+\.[A-Za-z]{2,3}$')
) tmp2
ON (tmp.cust_id = tmp2.cust_id)
WHEN MATCHED THEN
UPDATE
SET email = tmp2.frst_val;
Test 1 completes in ~30 seconds in my development environment.
Test 2drop table cust_sample;
drop table tmp_emails;
create table cust_sample as
SELECT cust_id, first_name, last_name
FROM customers
where rownum <= 25000;
alter table cust_sample add (email VARCHAR2(50));
MERGE INTO cust_sample tmp
USING (SELECT DISTINCT cust_id,
FIRST_VALUE(email) OVER (PARTITION BY cust_id ORDER BY add_dttm DESC) AS frst_val
FROM (SELECT cust_id,
email,
add_dttm
FROM emails
WHERE use_type = 'IE'
AND cust_id IN (SELECT cust_id FROM cust_sample))
WHERE REGEXP_LIKE (trim(email),
'^[A-Za-z0-9._%+-]+@[A-Za-z0-9._%+-]+\.[A-Za-z]{2,3}$')
) tmp2
ON (tmp.cust_id = tmp2.cust_id)
WHEN MATCHED THEN
UPDATE
SET email = tmp2.frst_val;
Test 2 completes in ~60 seconds in my development environment.
I'm aware this is relatively non-scientific (my measurements), but the difference is relatively large and it is consistent.