I'm not sure Ballantyne's approach does more work, at least for a reasonable data set; my implementation, as you suggested, may be simply inefficient for the small data set.
However, if we generate a larger data set, say, about 10,000 employees, like so:
CREATE TABLE emp2 AS
WITH
--------------------------------------------------------------------------------
-- Parameters (single record)
--------------------------------------------------------------------------------
parm AS (
SELECT 10000 AS NewEmployeeCnt -- start with zero employees, then count
, 3 AS NewSubordinateCnt-- start with zero per manager, then count
, 100 AS MaxOrderCnt -- 0..100 orders per employee
FROM DUAL
),------------------------------------------------------------------------------
-- employees and hierarchy. PK = employeeID
--------------------------------------------------------------------------------
emp_hier ( employeeID, EmployeeName, ReportsTo
, NewSubordinateCnt, NewEmployeeCnt, MAXSubCnt
, OrderCnt, MaxOrderCnt
) AS (
SELECT 1 AS employeeID
, TO_CHAR( to_date(1,'J'),'Jsp') AS EmployeeName
, 1 AS ReportsTo -- selfish at the top
, parm.NewSubordinateCnt
, parm.NewEmployeeCnt
, parm.NewSubordinateCnt AS MAXSubCnt
, ROUND(DBMS_RANDOM.VALUE*MaxOrderCnt,0) AS OrderCnt
, MaxOrderCnt
FROM parm
UNION ALL
SELECT emp_hier.employeeID + 1
, TO_CHAR( to_date(emp_hier.employeeID + 1,'J'),'Jsp') AS EmployeeName
, CASE WHEN NewSubordinateCnt > 1
THEN emp_hier.ReportsTo
ELSE emp_hier.ReportsTo + 1
+ CASE WHEN DBMS_RANDOM.VALUE > 0.2
THEN 0 ELSE 1 -- 20% ragged
END
END AS ReportsTo
, CASE WHEN NewSubordinateCnt > 1
THEN NewSubordinateCnt - 1
ELSE ROUND(DBMS_RANDOM.VALUE*emp_hier.MAXSubCnt,0)
END AS NewSubordinateCnt
, NewEmployeeCnt - 1 AS NewEmployeeCnt
, emp_hier.MAXSubCnt
, ROUND(DBMS_RANDOM.VALUE*MaxOrderCnt,0) AS OrderCnt
, MaxOrderCnt
FROM emp_hier
WHERE NewEmployeeCnt >= 1
)------------------------------------------------------------------------------
-- Pretty-up the results
--------------------------------------------------------------------------------
SELECT employeeID
, SUBSTR(EmployeeName,1,1) -- AS FirstName (initial)
||'. '
||InitCap(SUBSTR(EmployeeName,2,999)) -- AS LastName
AS EmployeeName
, CASE WHEN ReportsTo = employeeID
THEN NULL ELSE ReportsTo
END AS ReportsTo
, OrderCnt
FROM emp_hier
ORDER BY 1
Then a Ballantyne-like approach seems much more reasonable:
WITH -- **************** Ballantyne **********************
----------------------------------------------------------
-- top-down the hierarchy. PK = employeeID
----------------------------------------------------------
cteTree ( employeeID, firstName, lastName, ReportsTo
, by_self, HierPath, "Level", displaysequence
) AS (
SELECT emp2.EmployeeID
, SUBSTR(emp2.employeeName,1,1) AS FirstName
, InitCap(SUBSTR(emp2.employeeName,3,99)) AS LastName
, emp2.ReportsTo
, emp2.by_self
, ',' as HierPath
, 1 as "Level"
, TO_CHAR(1,'fm000000') as displaysequence
FROM emp2
WHERE emp2.ReportsTo IS NULL -- El Presidente
UNION ALL
SELECT emp2.EmployeeID
, SUBSTR(emp2.employeeName,1,1) AS FirstName
, InitCap(SUBSTR(emp2.employeeName,3,99)) AS LastName
, emp2.ReportsTo
, emp2.by_self
, cteTree.HierPath||emp2.ReportsTo||',' AS HierPath
, "Level" +1 as "Level"
, cteTree.displaysequence
||TO_CHAR(ROW_NUMBER()OVER
(ORDER BY InitCap(SUBSTR(emp2.employeeName,3,99))
, SUBSTR(emp2.employeeName,1,1))
,'fm000000') as displaysequence
FROM emp2
JOIN cteTree
ON cteTree.EmployeeID = emp2.ReportsTo
),--------------------------------------------------------
-- Tally table for shredding CSV. PK = node#.
----------------------------------------------------------
tally AS (
SELECT level as node#
FROM ( SELECT MAX(COALESCE(LENGTH(hierpath)
-LENGTH(REPLACE(hierpath,',',''))-1
,0)) max_element_cnt
FROM cteTree )
CONNECT BY LEVEL <= max_element_cnt
),--------------------------------------------------------
-- Sum subordinate orders. PK = ReportsTo
----------------------------------------------------------
whats_up_sub AS (
SELECT SUM(cteTree.BY_SELF) AS BY_SUB
, SUBSTR(hierpath,INSTR(hierpath,',',1,node#)+1
,INSTR(hierpath,',',1,node#+1)-INSTR(hierpath,',',1,node#)-1
) as ReportsTo
FROM tally
JOIN cteTree
ON tally.node# <= COALESCE(LENGTH(cteTree.hierpath)
-LENGTH(REPLACE(cteTree.hierpath,',',''))-1,0)
GROUP
BY SUBSTR(hierpath,INSTR(hierpath,',',1,node#)+1
,INSTR(hierpath,',',1,node#+1)-INSTR(hierpath,',',1,node#)-1
)
)---------------------------------------------------------
-- Results. PK = employeeID
----------------------------------------------------------
SELECT cteTree.employeeID
, LPAD(cteTree.firstName||'.'||cteTree.lastName
,((cteTree."Level"-1)*2)
+length(cteTree.firstName
||'.'||cteTree.lastName)
,' ') as ename
, cteTree."Level"
, cteTree.by_self
, COALESCE(whats_up_sub.by_sub,0) AS by_sub
, cteTree.by_self
+COALESCE(whats_up_sub.by_sub,0) AS total
, count(*)over() as emp_cnt
, max("Level")over() as maxdepth#
FROM cteTree
LEFT OUTER
JOIN whats_up_sub
ON whats_up_sub.ReportsTo = cteTree.employeeID
ORDER BY cteTree.employeeID
FETCH FIRST 15 ROWS ONLY
/
Elapsed: 00:00:02.28
Statistics
----------------------------------------------------
7 recursive calls
8434 db block gets
6778 consistent gets
199 physical reads
1208 redo size
1261 bytes sent via SQL*Net to client
372 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
57 sorts (memory)
0 sorts (disk)
15 rows processed
The "bushy recursion" (start everywhere simultaneously) seems to behave more like a cross join:
WITH --************** Bush *******************************
-- BushOut. PK = employeeID+InitialID
----------------------------------------------------------
BushOut ( InitialID, employeeID, firstName, lastName, ReportsTo, by_self
, HierPath, "Level", depth#
) AS (
SELECT emp2.EmployeeID AS InitialID
, emp2.EmployeeID
, SUBSTR(emp2.employeeName,1,1) AS FirstName
, InitCap(SUBSTR(emp2.employeeName,3,99)) AS LastName
, emp2.ReportsTo
, emp2.by_self
, emp2.ReportsTo||',' as HierPath
, 1 as "Level" -- w.r.t. InitialID
, CASE WHEN emp2.ReportsTo IS NULL
THEN 1
END AS Depth# -- w.r.t. Top Boss
FROM emp2
UNION ALL
SELECT BushOut.InitialID
, emp2.EmployeeID
, SUBSTR(emp2.employeeName,1,1) AS FirstName
, InitCap(SUBSTR(emp2.employeeName,3,99)) AS LastName
, emp2.ReportsTo
, emp2.by_self
, BushOut.HierPath||emp2.ReportsTo||',' AS HierPath
, "Level" +1 as "Level" -- w.r.t. InitialID
, BushOut.Depth# + 1 AS Depth# -- w.r.t. Top Boss
FROM emp2
JOIN BushOut
ON BushOut.EmployeeID = emp2.ReportsTo
), -------------------------------------------------------
-- Smoothing (depth) across nodes
----------------------------------------------------------
Smoothing AS (
SELECT INITIALID, EMPLOYEEID, FIRSTNAME, LASTNAME
, REPORTSTO, BY_SELF
, SUM(CASE WHEN InitialID <> EmployeeID
THEN by_self ELSE 0 END)OVER
(PARTITION BY InitialID) as BY_SUB
, SUM(by_self)OVER
(PARTITION BY InitialID) as TOTAL
, MAX(depth#)OVER(PARTITION BY EmployeeID) AS depth#
FROM BushOut
) -------------------------------------------------------
-- Indent names for final results
----------------------------------------------------------
SELECT EMPLOYEEID
, LPAD(firstName||'.'||lastName
,((depth#-1)*2)
+length(firstName||' '||lastName)
,' ') as ename
, DEPTH# AS "Level", BY_SELF
, BY_SUB, TOTAL
, count(*)over() as emp_cnt, max(Depth#)OVER() Depth#
FROM Smoothing
WHERE initialID = employeeID
ORDER BY employeeID
FETCH FIRST 15 ROWS ONLY
/
Elapsed: 00:00:10.50
Statistics
----------------------------------------------------
6 recursive calls
3321690 db block gets
1120 consistent gets
0 physical reads
0 redo size
1258 bytes sent via SQL*Net to client
372 bytes received via SQL*Net from client
2 SQL*Net roundtrips to/from client
31 sorts (memory)
0 sorts (disk)
15 rows processed
The 'start everywhere' approach resulted in 244K records, a lot to 'minnow down'.
Ballantyne's approach created 10,000 CSV, which needed to be shredded and summed, but it's a much smaller data set.
Did I miss anything?