RE: SQL problem
Date: Sun, 11 Jan 2015 12:26:36 +0100
Message-ID: <029701d02d91$75cd96e0$6168c4a0$_at_db-nemec.com>
Hello Thomas,
> As most of the period for k1 do not overlap
One obvious optimization is to calculate the degree of overlapping of the subintervals and highest priority per K1. (Note that subintervals are defined by considering all start and stop day for a specific key in the same way as in your solution). The subsequent (costly) join can be omitted for subintervals with:
a) Degree of overlapping = 1 or b) The priority of the subinterval equals the highest priority per keyMore precise formulation of b) is – the record chosen to represent the subinterval (from other parallel intervals) has the top priority for given K1.
For your sample data 3 rows could be eliminated from join (marked as FINAL –
SQL see below).
One due to non-overlapping (PDEG_ACC = 1); two due to top priority.
(PRIORITY = MIN_PRIO)
SRC, K1, START_DATE, END_DATE, PRIORITY, ENS, PDEG_ACC, MIN_PRIO
JOIN 1 01.01.14 31.03.14 7 456 2 7 JOIN 1 01.04.14 30.04.14 4 987 3 4 JOIN 1 02.06.14 01.07.14 1 212 3 1 JOIN 1 02.07.14 31.07.14 4 987 2 4 JOIN 1 01.08.14 01.09.14 4 987 3 4 JOIN 1 02.09.14 01.01.15 7 456 2 7 JOIN 1 02.01.15 30.12.99 9 123 1 9 JOIN 2 02.09.14 30.12.99 1 212 1 1 FINAL 1 01.05.14 01.06.14 1 212 4 1 FINAL 2 01.05.14 01.09.14 1 212 2 1 FINAL 2 01.01.14 30.04.14 4 987 1 1
This solution is very effective for non-overlapping data.
> The problem I see with this solution is if my data pattern change > completely and period do overlap most of the time
In the worst case the performance will degrade to your original solution, as only few records will be skipped from the join. In a lucky case (i.e. the start dates of the overlapping intervals are the same) you may still profit from the top priority reduction. See the handling of PRIORITY in the query below.
Here the query with few comments to point the details
INSERT
INTO etrn_tst_result
with set_pdeg as (
-- get time grid
- split in start and stop records
select
k1,
1 pdeg,
start_date trans_date,
priority,
ens
from etrn_tst
union all
select
k1,
-1 pdeg,
end_date + case when end_date != TO_DATE('31/12/9999', 'DD/MM/YYYY') then 1 else 0 end trans_date, NULL priority, NULL ens from etrn_tst ), acc_pdeg as ( - accumulate par. degree for per K1 select K1, pdeg, TRANS_DATE, PRIORITY,ENS,
- on the same date first close, than open (to keep degree lower); consider
the top rpirity as last to give the chance to winn the subinterval
sum(pdeg) over (partition by k1 order by TRANS_DATE, pdeg, priority desc
ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) pdeg_acc,
lead(TRANS_DATE) over (PARTITION BY k1 order by TRANS_DATE, pdeg, priority
desc ) TRANS_DATE_LEAD
from set_pdeg
)
, from_to as (
select K1, pdeg, TRANS_DATE START_DATE, TRANS_DATE_LEAD -1 END_DATE,
PRIORITY,ENS, pdeg_acc,
min(priority) over (partition by k1) min_prio
from acc_pdeg
where TRANS_DATE < TRANS_DATE_LEAD
)
--select * from from_to order by 1,3;
, final_part as ( - final part (without join) as there is only one parallel record or the priority is the highest one.
- Note: consider only opening records (pdeg = 1) as in the closing records the ens and prio are not valid
- rest of the records will be joined later
select * from from_to
where pdeg = 1 and (pdeg_acc = 1 or priority = min_prio)
)
--select * from final_part;
, need_join as ( - join with original table to get the highest prio record
select x.K1, x.START_DATE, x.END_DATE, y.PRIORITY, y.ENS,
min(y.priority) over (partition by x.K1, x.START_DATE) min_prio
from from_to x, etrn_tst y
where not (x.pdeg = 1 and (x.pdeg_acc = 1 or x.priority = x.min_prio)) and
x.k1 = y.k1 and
x.start_date between y.start_date and y.end_date
),
union_all as (
select 'JOIN' src, K1, START_DATE, END_DATE, PRIORITY, ENS
from need_join
where priority = min_prio
UNION ALL
select 'FINAL' src, K1, START_DATE, END_DATE, PRIORITY, ENS
from final_part)
--select * from union_all;
, merge1 as ( - merge adjecent records with the same value of ens
- same logic as in the original solution
select
k1 ,
start_date ,
end_date ,
ens ,
DECODE(DECODE(LAG(ens, 1) OVER(PARTITION BY k1 ORDER BY start_date), ens, 0,
1) + DECODE(LAG(end_date, 1) OVER(PARTITION BY k1 ORDER BY start_date),
start_date - 1, 0, 1), 0, 1, 0) AS ind_same
from union_all
),
merge2 as (
select
k1 ,
start_date ,
end_date ,
ens ,
ind_same,
RANK() OVER(PARTITION BY k1 ORDER BY start_date ASC) - SUM(ind_same)
OVER(PARTITION BY k1 ORDER BY start_date ASC ROWS BETWEEN UNBOUNDED
PRECEDING AND CURRENT ROW) AS grp
from merge1 )
--select * from merge2;
select k1 , min(start_date) start_date, max(end_date) end_date, ens from merge2 group by k1 ,ens, grp order by k1, START_DATE;
HTH mit freundlichen Grüßen,
Jaromir D.B. Nemec
http://www.db-nemec.com
--
http://www.freelists.org/webpage/oracle-l
Received on Sun Jan 11 2015 - 12:26:36 CET