Hi.
I'm using Oracle Analytic Views (from 19c) as defined here:
https://docs.oracle.com/en/database/oracle/oracle-database/19/dwhsg/overview-analytic-views.html I can create measures with simple aggregations (eg: SUM, COUNT, MIN), but I can't create a measure that collates COUNT(DISTINCT). I've tried both calculated and basic measures, without success. So,
How do I define a measure that is equivalent to aggregating by COUNT(DISTINCT(x)) using Oracle Analytic Views?
Any help you can provide would be greatly appreciated!
Some stripped-down, example SQL to show my issue is (which I can run, but not share, in livesql.oracle.com):
create table sales_fact (
CUSTOMER_ID VARCHAR2(50 CHAR),
COUNTRY VARCHAR2(50 CHAR),
PRODUCT_ID VARCHAR2(50 CHAR),
ORDER_ID VARCHAR2(50 CHAR),
MONTH_ID VARCHAR2(30 CHAR)
);
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('A', 'France', '1', 'A1', 'Apr-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('A', 'France', '2', 'A1', 'Apr-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('A', 'France', '1', 'A2', 'Jun-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('B', 'England', '2', 'B1', 'Jul-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('C', 'Germany', '2', 'C1', 'Aug-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('D', 'Germany', '3', 'D1', 'Apr-12');
create table time_dim (
MONTH_ID VARCHAR2(30),
MONTH_NAME VARCHAR2(40),
YEAR_ID VARCHAR2(30),
YEAR_NAME VARCHAR2(40)
);
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Apr-11', 'April', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Jun-11', 'June', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Jul-11', 'July', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Aug-11', 'August', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Apr-12', 'April', '12', 'CY2012');
CREATE OR REPLACE ATTRIBUTE DIMENSION time_attr_dim
USING time_dim
ATTRIBUTES
(year_id,
month_id)
LEVEL MONTH
KEY month_id
DETERMINES (year_id)
LEVEL YEAR
KEY year_id;
CREATE OR REPLACE HIERARCHY time_hier
USING time_attr_dim
(month CHILD OF year);
CREATE OR REPLACE ANALYTIC VIEW sales_av
USING sales_fact
DIMENSION BY
(time_attr_dim
KEY month_id REFERENCES month_id
HIERARCHIES (time_hier DEFAULT)
)
MEASURES
(salesCount FACT ORDER_ID AGGREGATE BY COUNT,
orderCount FACT ORDER_ID AGGREGATE BY COUNT -- How to make this count(distinct)?
)
;
-- 2011 should have a salesCount = 5 and orderCount = 4 (if it was distinct)
SELECT YEAR_ID, salesCount, orderCount
FROM sales_av HIERARCHIES(time_hier)
WHERE time_hier.level_name = 'YEAR'
ORDER BY time_hier.hier_order;
Try defining it as COUNT DISTINCT and you'll find:
CREATE OR REPLACE ANALYTIC VIEW sales_av
USING sales_fact
DIMENSION BY
(time_attr_dim
KEY month_id REFERENCES month_id
HIERARCHIES (time_hier DEFAULT)
)
MEASURES
(
salesCount FACT ORDER_ID AGGREGATE BY COUNT,
orderCount FACT ORDER_ID AGGREGATE BY COUNT DISTINCT -- How to make this count(distinct)?
);
ORA-18297: invalid data type detected on base measure "ORDERCOUNT" for use with aggregation operator "COUNT DISTINCT"
Before we resolve that, some background.
In a classic star schema, the various _ID columns are typically integer surrogate keys. These have FK references to dimension tables.
Measures on the fact table are usually numeric values - quantity sold, price, etc. The columns CUSTOMER_ID, PRODUCT_ID, etc. are really dimensions, not measures.
Personally I'd create a fact table like this:
create table sales_fact (
CUSTOMER_ID INTEGER,
COUNTRY_ID INTEGER,
PRODUCT_ID INTEGER,
ORDER_ID INTEGER,
DATE_ID INTEGER,
units_sold integer,
unit_price number
);
With all the *ID columns having FKs to a corresponding dimension table (CUSTOMER_DIM, PRODUCT_DIM, etc.).
So how do you enable COUNT DISTINCT?
Change the measures to be numeric columns!
For simplicity I've changed PRODUCT_ID to be an integer from your demo:
create table sales_fact (
CUSTOMER_ID VARCHAR2(50 CHAR),
COUNTRY VARCHAR2(50 CHAR),
PRODUCT_ID INTEGER,
ORDER_ID VARCHAR2(50 CHAR),
MONTH_ID VARCHAR2(30 CHAR)
);
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('A', 'France', '1', 'A1', 'Apr-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('A', 'France', '2', 'A1', 'Apr-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('A', 'France', '1', 'A2', 'Jun-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('B', 'England', '2', 'B1', 'Jul-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('C', 'Germany', '2', 'C1', 'Aug-11');
insert into sales_fact(CUSTOMER_ID, COUNTRY, PRODUCT_ID, ORDER_ID, MONTH_ID) values ('D', 'Germany', '3', 'D1', 'Apr-12');
create table time_dim (
MONTH_ID VARCHAR2(30),
MONTH_NAME VARCHAR2(40),
YEAR_ID VARCHAR2(30),
YEAR_NAME VARCHAR2(40)
);
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Apr-11', 'April', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Jun-11', 'June', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Jul-11', 'July', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Aug-11', 'August', '11', 'CY2011');
insert into time_dim(MONTH_ID, MONTH_NAME, YEAR_ID, YEAR_NAME) values ('Apr-12', 'April', '12', 'CY2012');
CREATE OR REPLACE ATTRIBUTE DIMENSION time_attr_dim
USING time_dim
ATTRIBUTES
(year_id,
month_id)
LEVEL MONTH
KEY month_id
DETERMINES (year_id)
LEVEL YEAR
KEY year_id;
CREATE OR REPLACE HIERARCHY time_hier
USING time_attr_dim
(month CHILD OF year);
CREATE OR REPLACE ANALYTIC VIEW sales_av
USING sales_fact
DIMENSION BY
(time_attr_dim
KEY month_id REFERENCES month_id
HIERARCHIES (time_hier DEFAULT)
)
MEASURES
(
sales_count FACT PRODUCT_ID AGGREGATE BY COUNT,
product_count FACT PRODUCT_ID AGGREGATE BY COUNT DISTINCT
)
;
SELECT YEAR_ID, sales_count, product_count
FROM sales_av HIERARCHIES(time_hier)
WHERE time_hier.level_name = 'YEAR'
ORDER BY time_hier.hier_order;
YEAR_ID SALES_COUNT PRODUCT_COUNT
11 5 2
12 1 1