Home » RDBMS Server » Performance Tuning » Need to improve query performance (Oracle 10g)
icon9.gif  Need to improve query performance [message #349657] Mon, 22 September 2008 09:47 Go to next message
RAJANKK4
Messages: 4
Registered: September 2008
Junior Member
Hi,

I need to improve the performance of a query. Currently the query is failing due to huge amount of data in tables. Oracle gurus, please help.

SELECT TL_PAYABLE_TIME.EMPLID
, TL_PAYABLE_TIME.EMPL_RCD
, TL_PAYABLE_TIME.DUR
, TL_PAYABLE_TIME.SEQ_NBR
, PERSONAL_DATA.NAME
, JOB.BARG_UNIT
, TL_PAYABLE_TIME.TRC
, TL_TRC.DESCR AS OH_TRC_DESCR
, TL_ERNCD.TL_ERNCD
, TL_PAYABLE_TIME.TL_QUANTITY
, SCH_ASSIGN.SCHEDULE_ID
, SCH_CLND.SCHED_HRS
, TL_PAYABLE_TIME.PAYABLE_STATUS
, TL_PAYABLE_TIME.PAYROLL_REQ_NUM
, TL_PAYABLE_TIME.FROZEN_SW
, TL_PAYABLE_TIME.FROZEN_DATE
, TL_PAYABLE_TIME.PUBLISH_SW
, TL_PAYABLE_TIME.ACTUAL_PUB_DATE
, TL_PAYABLE_TIME.APPRV_PRCS_DTTM
, TL_PAYABLE_TIME.OPRID
, TL_PAYABLE_TIME.ORIG_SEQ_NBR
, TL_PAYABLE_TIME.OVERRIDE_RATE
, TL_PAYABLE_TIME.RECORD_ONLY_ADJ
, TL_PAYABLE_TIME.LBR_DIST_AMT
, TL_PAYABLE_TIME.EST_GROSS
, TL_PAYABLE_TIME.DILUTED_GROSS
, TL_PAYABLE_TIME.SOURCE_IND
, TL_PAYABLE_TIME.TASKGROUP
, TL_PAYABLE_TIME.BILLABLE_IND
, TL_PAYABLE_TIME.ACCT_CD
, TL_PAYABLE_TIME.ACCOUNT
, TL_PAYABLE_TIME.DEPTID_CF AS OH_CF_DEPTID
, TL_PAYABLE_TIME.PROJECT_ID
, TL_PAYABLE_TIME.FUND_CODE
, TL_PAYABLE_TIME.PROGRAM_CODE
, TL_PAYABLE_TIME.CLASS_FLD
, TL_PAYABLE_TIME.CHARTFIELD1
, TL_PAYABLE_TIME.CHARTFIELD2
, TL_PAYABLE_TIME.CHARTFIELD3
, TL_PAYABLE_TIME.BUDGET_REF
, JOB.COMPANY
, TL_PAYABLE_TIME.BUSINESS_UNIT
, TL_PAYABLE_TIME.PRODUCT
, TL_PAYABLE_TIME.CUSTOMER
, TL_PAYABLE_TIME.BUSINESS_UNIT_PC
, TL_PAYABLE_TIME.BUSINESS_UNIT_PF
, TL_PAYABLE_TIME.SETID_ACTIVITY
, TL_PAYABLE_TIME.ACTIVITY_ID
, TL_PAYABLE_TIME.RESOURCE_TYPE
, TL_PAYABLE_TIME.SETID_RESOURCE
, TL_PAYABLE_TIME.RESOURCE_CATEGORY
, TL_PAYABLE_TIME.RESOURCE_SUB_CAT
, TL_PAYABLE_TIME.TASK
, TL_PAYABLE_TIME.USER_FIELD_1
, TL_PAYABLE_TIME.USER_FIELD_2
, TL_PAYABLE_TIME.USER_FIELD_3
, TL_PAYABLE_TIME.USER_FIELD_4
, TL_PAYABLE_TIME.USER_FIELD_5
, PERS_NID.NATIONAL_ID
, JOB.SETID_DEPT AS SETID
, JOB.DEPTID
, DEPT.DESCR AS OH_DEPT_DESCR
, JOB.POSITION_NBR
, POSITION_DATA.DESCR AS OH_POSITION_DESCR
, JOB.JOBCODE
, JOBCODE.DESCR AS OH_JOBCODE_DESCR
, JOB.LOCATION
, LOCATION.DESCR AS OH_LOCATION_DESCR
, JOB.UNION_CD
, UNION_TBL.DESCR AS OH_UNION_DESCR
FROM PS_TL_PAYABLE_TIME TL_PAYABLE_TIME LEFT OUTER JOIN PS_OH_S_TL_ERNCD TL_ERNCD ON TL_ERNCD.TRC = TL_PAYABLE_TIME.TRC
AND TL_ERNCD.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM LEFT OUTER JOIN PS_OH_S_SCH_ASSIGN SCH_ASSIGN ON SCH_ASSIGN.EMPLID = TL_PAYABLE_TIME.EMPLID
AND SCH_ASSIGN.EMPL_RCD = TL_PAYABLE_TIME.EMPL_RCD LEFT OUTER JOIN PS_OH_S_SCH_CLD_DL SCH_CLND ON SCH_CLND.SETID = SCH_ASSIGN.SETID
AND SCH_CLND.OH_SCH_ADHOC_IND = SCH_ASSIGN.OH_SCH_ADHOC_IND
AND SCH_CLND.SCHEDULE_ID = SCH_ASSIGN.SCHEDULE_ID
AND SCH_CLND.OH_ROTATION_ID = SCH_ASSIGN.OH_ROTATION_ID
AND SCH_CLND.DUR = TL_PAYABLE_TIME.DUR
, PS_JOB JOB LEFT OUTER JOIN PS_S_POSITION_DATA POSITION_DATA ON POSITION_DATA.POSITION_NBR = JOB.POSITION_NBR LEFT OUTER JOIN PS_DEPT_TBL DEPT ON DEPT.DEPTID = JOB.DEPTID
AND DEPT.SETID = JOB.SETID_DEPT LEFT OUTER JOIN PS_S_UNION_TBL UNION_TBL ON UNION_TBL.UNION_CD = JOB.UNION_CD
, PS_PERSONAL_DATA PERSONAL_DATA
, PS_PERS_NID PERS_NID
, PS_TL_TRC_TBL TL_TRC
, PS_LOCATION_TBL LOCATION
, PS_JOBCODE_TBL JOBCODE
WHERE TL_PAYABLE_TIME.EMPLID = PERSONAL_DATA.EMPLID
AND TL_PAYABLE_TIME.EMPLID = PERS_NID.EMPLID
AND PERS_NID.COUNTRY = 'USA'
AND PERS_NID.NATIONAL_ID_TYPE = 'PR'
AND TL_PAYABLE_TIME.EMPLID = JOB.EMPLID
AND TL_PAYABLE_TIME.EMPL_RCD = JOB.EMPL_RCD
AND LOCATION.SETID = JOB.SETID_LOCATION
AND LOCATION.LOCATION = JOB.LOCATION
AND JOBCODE.JOBCODE = JOB.JOBCODE
AND JOBCODE.SETID = JOB.SETID_JOBCODE
AND TL_TRC.TRC = TL_PAYABLE_TIME.TRC
AND TL_TRC.EFFDT = (
SELECT MAX(TL_TRC_ED.EFFDT)
FROM PS_TL_TRC_TBL TL_TRC_ED
WHERE TL_TRC.TRC = TL_TRC_ED.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
AND ( TL_ERNCD.EFFDT = (
SELECT MAX(TL_ERNCD_ED.EFFDT)
FROM PS_OH_S_TL_ERNCD TL_ERNCD_ED
WHERE TL_ERNCD.TRC = TL_ERNCD_ED.TRC
AND TL_ERNCD.PAY_SYSTEM = TL_ERNCD_ED.PAY_SYSTEM
AND TL_ERNCD_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
OR TL_ERNCD.EFFDT IS NULL )
AND ( SCH_ASSIGN.EFFDT = (
SELECT MAX(SCH_ASSIGN_ED.EFFDT)
FROM PS_OH_S_SCH_ASSIGN SCH_ASSIGN_ED
WHERE SCH_ASSIGN.EMPLID = SCH_ASSIGN_ED.EMPLID
AND SCH_ASSIGN.EMPL_RCD = SCH_ASSIGN_ED.EMPL_RCD
AND SCH_ASSIGN_ED.EFFDT <= TL_PAYABLE_TIME.DUR
AND SCH_ASSIGN_ED.END_EFFDT >= TL_PAYABLE_TIME.DUR )
OR SCH_ASSIGN.EFFDT IS NULL )
AND ( SCH_CLND.EFFDT = (
SELECT MAX(SCH_CLND_ED.EFFDT)
FROM PS_OH_S_SCH_CLD_DL SCH_CLND_ED
WHERE SCH_CLND.SETID = SCH_CLND_ED.SETID
AND SCH_CLND.OH_SCH_ADHOC_IND = SCH_CLND_ED.OH_SCH_ADHOC_IND
AND SCH_CLND.SCHEDULE_ID = SCH_CLND_ED.SCHEDULE_ID
AND SCH_CLND.OH_ROTATION_ID = SCH_CLND_ED.OH_ROTATION_ID
AND SCH_CLND.DUR = SCH_CLND_ED.DUR
AND SCH_CLND.OH_SHIFTNUM = SCH_CLND_ED.OH_SHIFTNUM
AND SCH_CLND_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
OR SCH_CLND.EFFDT IS NULL )
AND JOB.EFFDT = (
SELECT MAX(JOB_ED.EFFDT)
FROM PS_JOB JOB_ED
WHERE JOB.EMPLID = JOB_ED.EMPLID
AND JOB.EMPL_RCD = JOB_ED.EMPL_RCD
AND JOB_ED.EFFDT <= TL_PAYABLE_TIME.DUR )
AND JOB.EFFSEQ = (
SELECT MAX(JOB_ES.EFFSEQ)
FROM PS_JOB JOB_ES
WHERE JOB.EMPLID = JOB_ES.EMPLID
AND JOB.EMPL_RCD = JOB_ES.EMPL_RCD
AND JOB.EFFDT = JOB_ES.EFFDT )
AND LOCATION.EFFDT = (
SELECT MAX(LOCATION_ED.EFFDT)
FROM PS_LOCATION_TBL LOCATION_ED
WHERE LOCATION.SETID = LOCATION_ED.SETID
AND LOCATION.LOCATION = LOCATION_ED.LOCATION
AND LOCATION_ED.EFFDT <= JOB.EFFDT )
AND JOBCODE.EFFDT = (
SELECT MAX(JOBCODE_ED.EFFDT)
FROM PS_JOBCODE_TBL JOBCODE_ED
WHERE JOBCODE.SETID = JOBCODE_ED.SETID
AND JOBCODE.JOBCODE = JOBCODE_ED.JOBCODE
AND JOBCODE_ED.EFFDT <= JOB.EFFDT )
AND ( POSITION_DATA.EFFDT = (
SELECT MAX(POSITION_DATA_ED.EFFDT)
FROM PS_S_POSITION_DATA POSITION_DATA_ED
WHERE POSITION_DATA.POSITION_NBR = POSITION_DATA_ED.POSITION_NBR
AND POSITION_DATA_ED.EFFDT <= JOB.EFFDT )
OR POSITION_DATA.EFFDT IS NULL )
AND ( DEPT.EFFDT = (
SELECT MAX(DEPT_ED.EFFDT)
FROM PS_DEPT_TBL DEPT_ED
WHERE DEPT.DEPTID = DEPT_ED.DEPTID
AND DEPT.SETID = DEPT_ED.SETID
AND DEPT_ED.EFFDT <= JOB.EFFDT )
OR DEPT.EFFDT IS NULL )
AND ( UNION_TBL.EFFDT = (
SELECT MAX(UNION_TBL_ED.EFFDT)
FROM PS_S_UNION_TBL UNION_TBL_ED
WHERE UNION_TBL.UNION_CD = UNION_TBL_ED.UNION_CD
AND UNION_TBL_ED.EFFDT <= JOB.EFFDT )
OR UNION_TBL.EFFDT IS NULL )
AND TL_PAYABLE_TIME.DUR >= TO_DATE('2008-01-01' , 'YYYY-MM-DD')
Re: Need to improve query performance [message #349668 is a reply to message #349657] Mon, 22 September 2008 10:33 Go to previous messageGo to next message
Michel Cadot
Messages: 68716
Registered: March 2007
Location: Saint-Maur, France, https...
Senior Member
Account Moderator
please read OraFAQ Forum Guide, especially "How to format your post?" section.
Make sure that lines of code do not exceed 80 characters when you format.
Indent the code (See SQL Formatter).
Use the "Preview Message" button to verify.
Also always post your Oracle version (4 decimals).

You can first read:
- OraFAQ Forum Guide, Performance Tuning section
- How to Identify Performance Problem and Bottleneck
- OraFAQ Performance Tuning
- Rosco's SQL Tuning Guide

Then provide the requested and usual information.

Regards
Michel
Re: Need to improve query performance [message #349669 is a reply to message #349657] Mon, 22 September 2008 10:36 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
supah Wink
1) post execution plan
2) post tables rowcount
3) do something with you query, that it can be readable, use code tags to save formating in your post.
4) write down execution time
5) what oracle version you using, because in oracle such sub selects in where clause
 AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT) 
                           FROM PS_TL_TRC_TBL TL_TRC_ED 
                           WHERE TL_TRC.TRC = TL_TRC_ED.TRC 
                                 AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR ) 

refering to 2 tables are wery slow, ms sql server deals with them in no time

maybe you can write smaller size select in which will be that sub select I mentioned, so we can tune small size select, and know what to do next.
At the moment tune that large select without knowing tables and logic is really hard.

I have few ideas how to optimize that query, but you need to do all that things I mentioned.

[Updated on: Mon, 22 September 2008 11:18]

Report message to a moderator

Re: Need to improve query performance [message #349690 is a reply to message #349669] Mon, 22 September 2008 12:51 Go to previous messageGo to next message
RAJANKK4
Messages: 4
Registered: September 2008
Junior Member
Hi Kriptas,

I have put the Execution plan along with execution time in the attached file so that it is readable. The row counts are as follows:

PS_OH_S_SCH_CLD_DL: 277,380
PS_DEPT_TBL: 16,907
PS_LOCATION_TBL: 13,909
PS_JOBCODE_TBL: 11,171
PS_JOB: 650,347
PS_OH_S_TL_ERNCD: 234
PS_OH_S_SCH_ASSIGN: 24,600
PS_TL_PAYABLE_TIME: 40,730,349
PS_PERS_NID: 101,866
PS_PERSONAL_DATA: 101,854
PS_S_UNION_TBL: 22
PS_TL_TRC_TBL: 268
PS_S_POSITION_DATA: 177,736

I use Oracle version 10.2.0.

Re: Need to improve query performance [message #349692 is a reply to message #349657] Mon, 22 September 2008 13:01 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
can you write shorter version of query, with subselect i mentioned?
an rewrite whole query in ANSI sql (with INNER JOIN and LEFT OUTER JOIN) ?
Re: Need to improve query performance [message #349734 is a reply to message #349657] Mon, 22 September 2008 21:13 Go to previous messageGo to next message
harrysmall3
Messages: 109
Registered: April 2008
Location: Massachusetts
Senior Member
Count me in for the challenge as well, I just finished a 10g install on my local machine and now I have a purpose for my first schema Smile

I agree we should establish a simplified query which contains the minimal fields to represent the relation. Kriptas already has an approach angle to start - pls take his advice and post the results of execution with its corresponding plan.

I downloaded the plan you provided and glanced briefly;
I am going to approach, as a start, with the goal of reducing the number of records returned to the sort steps of each hash join while maintaining the general access methods
(I'm an avid hash join fan for large volume joins).

Supash can you also provide a list of table indices and
their clustering factor? Is it possible as well for you to gather full statistics with dbms_stats on underlying tables
with histograms for skewed columns?

If not please show the sample size used from the last stats
gathered? As well, list any constraints on the tables.

My reasons for the above are two-fold. One I want to be able
to as close as possible replicate the environment and also
know what level of "trust" I can have in Oracle based on the
statistics it is working with.

I'll be back when I have my schema set up. I'm sure Kriptas will already have you well on your way after supplying the simplified query and results of his suggestion.

Good evening all!
Harry
Re: Need to improve query performance [message #349735 is a reply to message #349657] Mon, 22 September 2008 21:17 Go to previous messageGo to next message
harrysmall3
Messages: 109
Registered: April 2008
Location: Massachusetts
Senior Member
one last thing - would be easier if you could provide create scripts for the tables as well. Thx -H
Re: Need to improve query performance [message #349739 is a reply to message #349657] Mon, 22 September 2008 22:49 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
run this query
CREATE TABLE TMP_XXX_1 as
SELECT TL_TRC_ED.TRC TRC1, MAX(TL_TRC_ED.EFFDT) MAX1
FROM PS_TL_TRC_TBL TL_TRC_ED 
INNER JOIN PS_TL_PAYABLE_TIME TL_PAYABLE_TIME ON TL_TRC_ED.TRC = TL_PAYABLE_TIME.TRC
                                                 AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR
GROUP BY TL_TRC_ED.TRC;

TRUNCATE TABLE TMP_XXX_1;
DROP TABLE TMP_XXX_1; 


show me execution plan
ant write down the execution time
Re: Need to improve query performance [message #349798 is a reply to message #349669] Tue, 23 September 2008 02:55 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
You are a big fan of the unsupported assertion, aren't you @Kriptas.

This is a beautiful example:
Quote:
5) what oracle version you using, because in oracle such sub selects in where clause

AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT)
FROM PS_TL_TRC_TBL TL_TRC_ED
WHERE TL_TRC.TRC = TL_TRC_ED.TRC
AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )


refering to 2 tables are wery slow, ms sql server deals with them in no time


I have never found this to be the case - sub selects are not very slow unless you've forgotten to index them, in which case no Db on the planet can do anything to help you.

Does it really strike you as believable that with all the effort Oracle have put into the CB Optimizer, they'd ignore something as fundamental as sub-query performance?

I put it to you that it seems slower in Orcale because in Sql Server you would never dream of running a query this complex - Instead you'd start by creating and populating a set of temporary tables for the sub queries, and run the final query against them.
Thus while the final execution of the query seems to take less time, this is only true if you ignore all the time it took to set things up in the first place.


IT is certainly the ubqueries that are going to be the problem here, but that's because you're running 11 seperate subqueries.

Are they all indexed? Ideally, you'd want an index on each of the sub-query tables that contained the join columns used, adnthe field returned - that way you could get away with an Index Range scan for each one, with no associated table access.
Re: Need to improve query performance [message #349810 is a reply to message #349657] Tue, 23 September 2008 03:42 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
JRowbottom,

I have tested such query performance, and can say, that oracle have lots of limitation on that query type (just for that type of subselect which I wrote down in my post)
MS SQL SERVER deals with such queries about 100 times faster.

oracle 10g can't have such subselect depending join if it is outer join and etc.

and other thing, i have made more complex queries in ms sql and they ran fine Wink
and if I load some temporary tables to run some queries, i always calculate all time of query with temporary tables load times.. and that time usualy a lot les then oracle execution time with huge subqueries.


so, if you do not have what to compare then the turtle is fasters animal in the world.

[Updated on: Tue, 23 September 2008 04:41]

Report message to a moderator

Re: Need to improve query performance [message #349869 is a reply to message #349810] Tue, 23 September 2008 06:29 Go to previous messageGo to next message
rajatratewal
Messages: 507
Registered: March 2008
Location: INDIA
Senior Member
Quote:

I have tested such query performance, and can say, that oracle have lots of limitation on that query type (just for that type of subselect which I wrote down in my post)
MS SQL SERVER deals with such queries about 100 times faster.


Kriptas No two databases are the same.

The thing applied to one cannot be same for another because they
are internally different.

You can only compare them if you know the internals how they interpret the query.

Regards,
Rajat

Re: Need to improve query performance [message #349872 is a reply to message #349810] Tue, 23 September 2008 06:33 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
My mileage on this matter seems to vary significantly from yours.
I ran this little test on my out of the box 10g database, running on my laptop (1.8Ghz, 1Gb Ram:
drop table test_0105;

drop table test_0106;

create table test_0105  (col_1  number, col_2 number);

create table test_0106 (col_1 number,col_3 number);

insert into test_0105 select level,mod(level,1000) from dual connect by level <=1000000;

insert into test_0106 select level, mod(level,100) from dual connect by level <= 1000000;

create index test_0106_idx on test_0106(col_3,col_1);

select count(*)
 from   test_0105 t5
 where  t5.col_1 <= (select max(col_1)
                     from   test_0106 t6
                     where  t6.col_3 = t5.col_2);


It took about 1.4 seconds the first time, and an average of 0.8 seconds thereafter.

I then repeated the test on my out of the box SqlServer Express 2005 database (same laptop)
Table Definitions
CREATE TABLE [dbo].[Test_1](
	[col_1] [numeric](18, 0) NULL,
	[col_2] [numeric](18, 0) NULL
) ON [PRIMARY];

CREATE TABLE [dbo].[Test_2](
	[col_1] [numeric](18, 0) NULL,
	[col_3] [numeric](18, 0) NULL
) ON [PRIMARY];

CREATE NONCLUSTERED INDEX [test_2_idx] ON [dbo].[Test_2] 
(
	[col_3] ASC,
	[col_1] ASC
)WITH (PAD_INDEX  = OFF, SORT_IN_TEMPDB = OFF, DROP_EXISTING = OFF, IGNORE_DUP_KEY = OFF, ONLINE = OFF) ON [PRIMARY];


create table numbers (num numeric(18,0));

insert into numbers(num)
select rank() over (order by object_id,column_id) 
from sys.all_columns;


select (n1.num-1)*1000+(n2.num-1) totnum
from   numbers n1, numbers n2
where  n1.num<= 1000
and    n2.num<= 1000
order by totnum;

delete test_1;

delete test_2;

insert into test_1 (col_1,col_2)
select (n1.num-1)*1000+(n2.num-1),n1.num-1
from   numbers n1, numbers n2
where  n1.num<= 1000
and    n2.num<= 1000;

insert into test_2 (col_1,col_3)
select (n1.num-1)*1000+(n2.num-1),n2.num-1
from   numbers n1, numbers n2
where  n1.num<= 1000
and    n2.num<= 1000;

select count(*)
from   test_1 t1
where  t1.col_1 <= (select max(col_1)
                    from   test_2 t2
                    where  t2.col_3 = t1.col_2);


The Sql Server implemebntation, that you would have us believe should take in the order of 10 milliseconds.
It took 28 seconds the first time I rand, and an average of 7-8 seconds for subsequent runs.

I put it to you that you have some work to do if you wish to defend your claim that SqlServer will perform queries like this in 1% of the time that an Oracle database can.
Re: Need to improve query performance [message #349878 is a reply to message #349657] Tue, 23 September 2008 06:45 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
I do not say, that 2 RDBMS must be the same.
I just say, that MS SQL SERVER with that kind of sub query deals normaly, ORACLE have BIG trouble with that sub select.

for example :
create table tmp_01 as
select accnt_code, crnc_rate
from accounts
inner join crnc_rates on accnt_crnc=crnc_crnc
							  and crnc_date=(select max(crnc_date) from crnc_rates
									    where crnc_rates=accnt_crnc and crnc_date<=accnt_open_dt)


table accounts rowcount:4 000 000
Table crnc_rates rowcount 150 000, contains currency rate changes and currency value at that change time.

all statistics are gathered

by that query I need to get account currency rate at account open day.
crnc_rates is ordered by crnc_crnc asc, crnc_date desc and indexed with index on that 2 columns (crnc_crnc asc, crnc_date desc)

that query in ms sql runs about 10-15seconds.

the same query on oracle 10G already runs 15 mins, and still do not finished.

tables on ms sql server, and oracle 10g are the same 100%
indexed with the same indexes

in the provided select from @RAJANKK4 is few sub selects simmilar to that which I posted there.
I think that problem is in that sub selects.

oh, while I wrote this post, oracle finished my select, and it run time was 1566,578 seconds!!!! (compare to 15seconds in ms sql server, and oracle server hardware is 2-5 times powerful then ms sql server)
Re: Need to improve query performance [message #349892 is a reply to message #349657] Tue, 23 September 2008 07:11 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
dear @JRowbottom,
thank you for posting that example.
Using your script i created tables indexed one of them and runned query on oracle,
it run time first time 2 sec, and other times 1,1-1,4 sec.

after that, i pumped data to ms sql server from oracle tables created by your script (do not indexed tables)
and runned query.
run time was 00:00:00, how meny miliseconds it taked i do not know....
so it relly faster than oracle...
and i can compare sql server express, because it is whole different product line, and it may have super weak optimiser.
i use oracle enterprise, and sql server standart editions.

and @JRowbottom, then i said, that oracle do have some troubles with some subqueries, i mentioned not your posted subquer type.

look at my posted subquery :
create table tmp_01 as
select accnt_code, crnc_rate
from accounts
inner join crnc_rates on accnt_crnc=crnc_crnc
                         and crnc_date=(select max(crnc_date) from crnc_rates
                                        where crnc_rates=accnt_crnc and crnc_date<=accnt_open_dt)


if i delete where clause statement "and crnc_date<=accnt_open_dt" in subselect, query will be fast. ir run time will be 20seconds. but if that statement present, query run time will be 1500 seconds.

Try it and you will see.
and by the way... oracle cant join thas sub select with LEFT OUTER JOIN.

[Updated on: Tue, 23 September 2008 08:07]

Report message to a moderator

Re: Need to improve query performance [message #349931 is a reply to message #349692] Tue, 23 September 2008 09:13 Go to previous messageGo to next message
RAJANKK4
Messages: 4
Registered: September 2008
Junior Member
Hi Kriptas,

I have an idea to break the cumbersome query into two statements. The first one will select data from PS_TL_PAYABLE_TIME and PS_JOB without the outer joins and create a data set. In place of descriptions (and other fields from the tables coming from outer join), I will populate 0 or blank.

Then in the second query I will update the descriptions from the outer join tables into the dataset. A Sampple of the queries is attached.

Also I am wondering whether I can use the MERGE statement instead of UPDATE in query 2. Will that work?

Please let me know if the logic looks ok.

Rajan
  • Attachment: Break Up.txt
    (Size: 5.05KB, Downloaded 1415 times)
Re: Need to improve query performance [message #349935 is a reply to message #349657] Tue, 23 September 2008 09:32 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
i do not think so, update is slow operation, and that update you post there must be wery slow.
if you want update one table with data from other table, and tables are big, use MERGE statement, it updates tables more quickly.
Re: Need to improve query performance [message #349941 is a reply to message #349935] Tue, 23 September 2008 09:41 Go to previous messageGo to next message
RAJANKK4
Messages: 4
Registered: September 2008
Junior Member
You are right. The UPDATE is very slow.

I tried MERGE instead of update. Just for comparision and syntax but am not sure whether it is ok in terms of syntax because I want to update one target table and the source is a join of two tables.

Can you take a look and point out if the MERGE is ok. Query attached.

Rajan
  • Attachment: merge.txt
    (Size: 0.45KB, Downloaded 1378 times)
Re: Need to improve query performance [message #349942 is a reply to message #349892] Tue, 23 September 2008 09:49 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
In general:
Don't post annecdotes - post test cases. as the phrase goes: the plural of annecdote is not data.
Generate a set of test data that reproduces this problem that you are having, and someone here will show you where you have gone wrong.


Re the test case:
You seem to be saying that you've got a machine that will run the SQLServer test case at least an order of magnitude faster than mine, and yet it can't match my performance of the Oracle test.
I remember from some of your earlier posts that your production Oracle database seemed to have some major configuration problems - it was slower than my laptop then, if memory serves.
Would I be right in assuming that you are still having these problems and that this is a symptom of them?
Re: Need to improve query performance [message #349950 is a reply to message #349935] Tue, 23 September 2008 10:17 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
Quote:
if you want update one table with data from other table, and tables are big, use MERGE statement, it updates tables more quickly.


@Kriptas: If you're going to make blanket pronouncements about whether one thing is faster tahn another, you might want to provide a little evidence
As you'll see from the results below, there's nothing in it.:
create table test_0109 (col_1 number, col_2 number);

create table test_0110 (col_1 number, col_2 number);

insert into test_0109 select level,null from dual connect by level <=10000;

insert into test_0110 select level, mod(level,100) from dual connect by level <= 10000;

create index test_0110_idx on test_0110(col_1,col_2);

declare
  v_iter   pls_integer := 10;
  v_time   pls_integer; 
begin
  v_time := dbms_utility.get_time;
  
  for i in 1..v_iter loop
    update test_0109 t9
    set    col_2 = (select col_2 
                    from   test_0110 t0
                    where  t9.col_1 = t0.col_1);
  end loop;
  
  dbms_output.put_line('Update '||to_char(dbms_utility.get_time-v_time));
  
  v_time := dbms_utility.get_time;
  
  for i in 1..v_iter loop
    merge into test_0109 t9
    using test_0110 t0
    on (t0.col_1 = t9.col_1)
    when matched then update set t9.col_2 = t0.col_2;
  end loop;
  
  dbms_output.put_line('Merge '||to_char(dbms_utility.get_time-v_time));  
end;
/

Results
set serveroutput on
Update 753
Merge 727

Update 525
Merge 551

Update 365
Merge 297

Update 461
Merge 652


Even repeating the tests with 10x as many rows shows no conclusive advantage one way or the other:
Update 3323
Merge 2750

Update 3002
Merge 3365


I'll grant you that it performs better than any other Update/Insert combination when you are doing conditional Upserts, but when you're just updating, it's just the same as Update.
Re: Need to improve query performance [message #349972 is a reply to message #349657] Tue, 23 September 2008 11:25 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
@JRowbottom, i can say that you are good oponent and it is pleasure to have this conversation with you. You provide real examples, but examples can be different

so I prepare example from my situation, where oracle performanse is wery poor.

drop table gt_test_0105;

drop table gt_test_0106;

create table gt_test_0105  (col_1  number, col_2 number, date_1 date);

create table gt_test_0106 (col_1 number,col_3 number, date_2 date);

insert into gt_test_0105 select level,mod(level,10), sysdate date_1 from dual connect by level <=1000000;

insert into gt_test_0106 select level, mod(level,10), sysdate-(1+ABS(MOD(dbms_random.random,1000))) date_2 from dual connect by level <= 100000;

create index gt_test_0106_idx on gt_test_0106(col_3 , date_2 );                     




select count(*) 
from gt_test_0105 t5 
inner join gt_test_0106 t6 on t6.col_3=t5.col_2
                              and date_2=(select max(t7.date_2) 
                                          from gt_test_0106 t7
                                          where t7.col_3= t5.col_2 
                                                and t7.date_2<=t5.date_1)

 


it returned for me count ~10 000 000

ms sql server standart edition query runs 1 second (tables and gt_test_0105, gt_test_0106 has no indexes)
and about the same amount of time when table gt_test_0106 is indexes with the same index like in oracle. but execution plans are different.

in oracle... it will take a while i'm still wayting for it ends (already about 5 minutes and i think it will take about 20 minutes or more)

you can test it...

regarding servers.... i do not know why, but i think oracle server do not perform @100%, but now he running litle bit faster.
but there is still HUGE problem selecting rows from table where is big index clustering factor.

and about merge statement, i'll test your axamples and provide my test cases.

thanx again for good conversation
Re: Need to improve query performance [message #349995 is a reply to message #349657] Tue, 23 September 2008 12:33 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
@JRowbottom, and about merge statement
it is wrong use loop for index based query, after 1 run index will be in cached, and other iterations will be fast, so is you using loop, flush buffer please Wink

there is remake of your provided script, but you need to run it whole only one time, and again drop tables, load data to tables and reindex

drop table gt_test_0109

drop table gt_test_0110

create table gt_test_0109 (col_1 number, col_2 number);

create table gt_test_0110 (col_1 number, col_2 number);

insert into gt_test_0109 select level,null from dual connect by level <=100000;

insert into gt_test_0110 select level, mod(level,100) from dual connect by level <= 100000;

create index test_0110_idx on gt_test_0110(col_1,col_2);

declare
  v_iter   pls_integer := 1;
  v_time   pls_integer; 
begin
  v_time := dbms_utility.get_time;
  

  for i in 1..v_iter loop
    update gt_test_0109 t9
    set    col_2 = (select col_2 
                    from   gt_test_0110 t0
                    where  t9.col_1 = t0.col_1)
    --where col_2 is null
    ;
  end loop;
  
  dbms_output.put_line('Update '||to_char(dbms_utility.get_time-v_time));
  
  v_time := dbms_utility.get_time;
  
  for i in 1..v_iter loop
    merge into gt_test_0109 t9
    using gt_test_0110 t0
    on (t0.col_1 = t9.col_1 /*and  t9.col_2 is null*/)
    when matched then update set t9.col_2 = t0.col_2;
  end loop;
  
  dbms_output.put_line('Merge '||to_char(dbms_utility.get_time-v_time));  
end;



in the scrip I use 10 times larger tables, and only 1 loop, so indexes are not cached and etc.

results are
Update 1585
Merge 160


I do not think that there is lots of queries where you need to update the same table with the same values 10 times... always need to do job at 1 time Wink so lets measure only 1 loop.
and results shows that merge is 10 times faster.

Merge uses HASH JOIN of 2 tables and then updates one of them (it is like update view, but it has less limitations)
in simple update there used nested loops to find row which you need to update.. so in merge you do not need index, in simple update you need index.

oh by the way, last query with count(Cool and max in sub select just finished in oracle, it run time 4399.516 seconds (ms sql server standart done in 1 second)

and I droped index test_0110_idx, and run update script...
it runs now for 10 mins, and I think there will be huge difference between simple update and merge update (~100 times or more merge will be faster)


[Updated on: Tue, 23 September 2008 12:36]

Report message to a moderator

Re: Need to improve query performance [message #350000 is a reply to message #349657] Tue, 23 September 2008 13:26 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
@RAJANKK4
did you run query which i asked to run and show execution plan and write down run time?
because with updates you do not get goos performance results.

there is problem with sub selects, and we need to find aproach...

please write down short select (select few rows)
with few joins and one join containing subselect
for example one with this join predicate:
      AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT) 
                           FROM PS_TL_TRC_TBL TL_TRC_ED 
                           WHERE TL_TRC.TRC = TL_TRC_ED.TRC 
                                 AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR ) 


and it execution plan, and run time
then we will tune that small select, and will know what to do next.

I do not have your data and can't tune query without your help

Re: Need to improve query performance [message #350203 is a reply to message #350000] Wed, 24 September 2008 06:32 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
Re Update / Merge

Thank you for the compliment.

You have overlooked two things in your single pass Update/Merge example -
1) The table gets cached, as well as the index
2) Merge uses the index too, and gets to benefit if the index is cached.

Thus whichever proces goes first in the loop gets the handicap of doing the most physical reads the first time it runs.

If you re-run your test and put the Merge and the update tests the other way round, you'll see what a difference it makes.

When I did this test in these two fashions, I got these results:
Merge First
Merge 1239
Update 422

Merge 1244
Update 433

Update First
Update 1793
Merge 319

Update 1399
Merge 325
Re: Need to improve query performance [message #350205 is a reply to message #349972] Wed, 24 September 2008 06:42 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
Re Subqueries:

Here's a Subquery test case. now, the figures aren't conclusive, as my laptop is groaning under the weight of applications, and (being only a single disk) I'm getting all sorts of I/O wait problems with the Oracle Db:
I modeled this on the problem you described as best as I could.
I cut the number of rows down to 4000, because I've only got a limited amount of patience.

Results
Run 1 Run 2
Oracle 136s 146s
Sql Server 495s 511s

Those performance figures you quite for the SQL Server Db are staggering.

Questions:
1) What sort of box is it running on?
2) Is the Oracle box running on the same box
3) What execution plan is it using?
4) What happens if you replace the count(*) with count(col_1||col_4)?

Oracle Code

create table test_0107 (col_1  number, col_2  number,col_3  date);

create table test_0108 (col_2 number, col_3 date, col_4 number);


insert into test_0107 select level, mod(level,10)+1,sysdate from dual connect by level <=4000;

insert into test_0108 select l1.lvl,trunc(sysdate)+10-l2.lvl, l1.lvl + l2.lvl 
                      from (select level lvl from dual connect by level <= 10) l1
                          ,(select level lvl from dual connect by level <= 75000) l2;

create index test_0108_idx2 on test_0108(col_3 desc,col_2 ,col_4);

create index test_0108_idx  on test_0108(col_2,col_3 desc,col_4);

begin
  dbms_stats.gather_table_stats(ownname => null
                               ,tabname => 'TEST_0107'
                               ,method_opt => 'FOR ALL COLUMNS'
                               ,cascade => true);

  dbms_stats.gather_table_stats(ownname => null
                               ,tabname => 'TEST_0108'
                               ,method_opt => 'FOR ALL COLUMNS'
                               ,cascade => true);
end;
/
                   
select count(col_4)
from   test_0107 t7
      ,test_0108 t8
where  t7.col_2 = t8.col_2
and    t8.col_3 = (select max(t81.col_3)
                   from   test_0108 t81
                   where  t7.col_2 = t81.col_2
                   and    t8.col_2 = t81.col_2
                   and    t81.col_3 <= t7.col_3
                   );
[CODE]
SQL Server Code
===============
The Numbers table is the same as the previous test

create table big_numbers (num numeric(10,0));

insert into big_numbers
select (n1.num-1)*1000 + n2.num
from numbers n1,numbers n2
where n1.num <=1000
and n2.num <=1000;

create table test_03 (col_1 numeric(10,0), col_2 numeric(10,0),col_3 datetime);

create table test_04 (col_2 numeric(10,0), col_3 datetime, col_4 numeric(10,0));

create index test_04_idx2 on test_04(col_3 desc,col_2 ,col_4);

create index test_04_idx on test_04(col_2,col_3 desc,col_4);

insert into test_03
select n1.num*10+n2.num, n2.num,getdate()
from numbers n1
,numbers n2
where n1.num <=400
and n2.num <=10;

insert into test_04
select n1.num,getdate()+10-n2.num, n1.num + n2.num
from numbers n1
,big_numbers n2
where n1.num <= 10
and n2.num <= 75000;

select count(col_4)
from test_03 t3
,test_04 t4
where t3.col_2 = t4.col_2
and t4.col_3 = (select max(t41.col_3)
from test_04 t41
where t3.col_2 = t41.col_2
and t4.col_2 = t41.col_2
and t41.col_3 <= t3.col_3
);
[/CODE
Re: Need to improve query performance [message #350210 is a reply to message #349657] Wed, 24 September 2008 07:03 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
jep, I tried, results simmilar as yours. but try use merge and simple update without indexes, and you see what happens Wink

I tested yesterday so there will be huge differense..
do not have results saved, but you can try.

do not forget one more thing, in your test case indexes are fresh, clustering factor is minimal. such ideal test cases for indexes is not real word cases.

you agree that hash join in meny cases are faster then nestes loop osh 2 tables where record amount is about 1M ?

in my test case MERGE statement use hash join, and simple update use "nested loops"





Re: Need to improve query performance [message #350219 is a reply to message #349657] Wed, 24 September 2008 07:47 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
yep in that context ms sql server works slower,
but reason for that ir wrongly writen query ....
your query:

select count(col_4)
from   test_0107 t7
      ,test_0108 t8
where  t7.col_2 = t8.col_2
and    t8.col_3 = (select max(t81.col_3)
                   from   test_0108 t81
                   where  t7.col_2 = t81.col_2
                   and    t8.col_2 = t81.col_2
                   and    t81.col_3 <= t7.col_3
                   );


why do you need in sub select this part? :
where  t7.col_2 = t81.col_2
       and    t8.col_2 = t81.col_2


this is redundant predicate, t7.col2 will be allways equal to t8.col2 (it is got from first join predicate t7.col_2 = t8.col_2)
so you need only one of them.

use this query, result set will be the same

select count(col_4)
from   test_0107 t7
      ,test_0108 t8
where  t7.col_2 = t8.col_2
and    t8.col_3 = (select max(t81.col_3)
                   from   test_0108 t81
                   where  t7.col_2 = t81.col_2
                          and    t81.col_3 <= t7.col_3
                   );


and show results now
Re: Need to improve query performance [message #350246 is a reply to message #349657] Wed, 24 September 2008 08:36 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
and just try this query on your created tables
select count(1) 
from test_0108 t8
where t8.col_3=(select max(t81.col_3)
                   from   test_0108 t81
                   where  t8.col_2 = t81.col_2
                   and    t81.col_3 < t8.col_3
                   );


in normal writen queries ms sql beets oracle so badly...
in last select you used redundant predicates, but you do not writen down run time result of my posted queries as examples...

this query is wery simple i say, ms sql do it in 1 sec, oracle...oh oracle .... im still wayting for it finish his job.

try this query and post tun time results on ms sql and oracle...

but with no scheating, just this query with the same tables which data loading scrip you posted in last your post.


[Updated on: Wed, 24 September 2008 08:47]

Report message to a moderator

Re: Need to improve query performance [message #350261 is a reply to message #350219] Wed, 24 September 2008 09:13 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
Well cover me in fish paste and leave me out for the Deep Ones.

I apologise unreservedly. Sql server runs that query nearly 3 oredrs of magnitude faster than oracle does - I upped the number of rows in the Sql server test to 40,000 and it took 14 seconds.

Can you explain what it's doing behind the scenes? I'd love to know.

I also apologise for the delay in replying - for the sake of my pride, I thought I'd see if there was a way of rewriting the query to improve the performance, and there is:

This query here, which returns the same set of data runs on a 4,000,000 row data set in 8.5 seconds

select count(col_4)
from (
select t7.col_1
      ,t7.col_2
      ,(select distinct first_value(t8.col_4) over (order by t8.col_3 desc)
        from   test_0108 t8
        where  t8.col_2 = t7.col_2
        and    t8.col_3 <= t7.col_3) col_4
from test_0107 t7);   


Hoever, this does not alter the fact that you are right, and Sql Server does run that query much faster than Oracle does.

[Corrected query - performance figures unchanged]

[Updated on: Wed, 24 September 2008 09:23]

Report message to a moderator

Re: Need to improve query performance [message #350275 is a reply to message #349657] Wed, 24 September 2008 10:08 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
i do not undestand your sentence ? :
This query here, which returns the same set of data runs on a 4,000,000 row data set in 8.5 seconds

select count(col_4)
from (
select t7.col_1
      ,t7.col_2
      ,(select distinct first_value(t8.col_4) over (order by t8.col_3 desc)
        from   test_0108 t8
        where  t8.col_2 = t7.col_2
        and    t8.col_3 <= t7.col_3) col_4
from test_0107 t7);   



it ried it at home now, and it runs on the same tables for 2.5 seconds.

for table creation was used your script
insert into test_0107 select level, mod(level,10)+1,sysdate from dual connect by level <=4000;


your provided query has not the same purpose as mine, but if you know what data you need to get in output record set, you can use workarounds.

best workaround we found for the same query is :
select count(col_4)
from (
select t7.col_1
      ,t7.col_2
      ,(select t8.col_4 from test_0108 t8
        where t8.col_2 = t7.col_2
              and t8.col_3= (select max(t81.col_3) 
                             from test_0108 t81 
                             where  t81.col_2 = t7.col_2
                                    and t81.col_3<=t7.col_3)) col_4
from test_0107 t7); 


if we need, we can put that inline query to function, but use of any function (UDF) hit performance ~10 times for ms sql server and oracle, but in oracle you can use predicated option in function creation header, so performance hit will be minimal.



Re: Need to improve query performance [message #350280 is a reply to message #349657] Wed, 24 September 2008 10:19 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
and about ms sql server,
he have good query remake algorithms, and what he does behind the scenes i do not know..
you can see execution plan, he uses eager index spools, lazy index spools, online bitmaping and etc.

but at the moment, i seeking for answer, why ms sql server uses indexes 10000 times faster then oracle in some cases?
if index clustering factos is huge, oracle uses indexes so slow, it is big problem for me.
like i wrote in my firs post in that forum, the same query on ms sql server on the same data runs 4 minutes, and the same query, on the same data and on oracle runs 3 hours...
after table reorganisation and clustering factor minimisation query on oracle runs for ~30secons, on ms sql server it runs 15 seconds.
so i cant undestand why there is so big performance hit on oracle if table have big clustering factor?
(the same clustering factor is on the ms sql server mashine)


i tried to generate the test data, but cang get the same clustering fctor.

table reorganisation on oracle database is not the answer, it takes too much time... we need to periodicaly reorganise whole database... and it is inposible..
and what to do with tables where is few colums used in filtering/join predicates? if i reorganize table by one column, the result will be negatyve to other column...
Re: Need to improve query performance [message #350283 is a reply to message #350210] Wed, 24 September 2008 10:28 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
True - if there is no index, the Update will perfrom poorly.
however, if the number of rows to be updated would make the hash join too large for your Hash area size, then the performance of the Merge will go downhill very quickly.

Whether or not the index has a high clustering factor doesn't make much of a difference. I ran an additional set of tests on a table populated with this command:
insert into gt_test_0111 select to_number(reverse(to_char(level,'000000'))), mod(level,100) from dual connect by level < 100000;
to ensure that the clustering factor would be as high as possible, and the results show no marked performance drop for the clustered index.

Also, if you need to update a column conditionally, then Merge is not the tool of choice:
Populate Gt_Test_0109 like this:
insert into gt_test_0109 select level,case when mod(level,100)=1 then null else 1 end from dual connect by level <100000;
and run the tests like this:
declare
  v_iter   pls_integer := 1;
  v_time   pls_integer; 
begin
  -- Pre cache table
  for i in 1..v_iter loop
    update gt_test_0109 t9
    set    col_2 = (select col_2 
                    from   gt_test_0110 t0
                    where  t9.col_1 = t0.col_1)
    ;
  end loop;
  
  rollback;


  v_time := dbms_utility.get_time;
  
  for i in 1..v_iter loop
    merge into gt_test_0109 t9
    using (select * from gt_test_0110 where col_2 is null) t0
    on (t0.col_1 = t9.col_1)
    when matched then update set t9.col_2 = t0.col_2;
  end loop;
  
  dbms_output.put_line('Merge '||to_char(dbms_utility.get_time-v_time));
  
  rollback;
  
  v_time := dbms_utility.get_time;

  for i in 1..v_iter loop
    update gt_test_0109 t9
    set    col_2 = (select col_2 
                    from   gt_test_0110 t0
                    where  t9.col_1 = t0.col_1)
    where col_2 is null;
  end loop;
 
  dbms_output.put_line('Update '||to_char(dbms_utility.get_time-v_time));

end;
/
and I get these results:
Merge 77
Update 9

Merge 53
Update 11

Merge 67
Update 15


Now, if you use 10* as many rows, then Merge starts to pull ahead again. They seems to break even somewhere round the 300,000 mark on mine

I would say (in conclusion) that neither one is consistently better than the other in all cases.
Re: Need to improve query performance [message #350291 is a reply to message #349657] Wed, 24 September 2008 10:39 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
i think you wrongly writed merge statement in your last post.
merge better performs is conditions are listes after ON in join predicate list

    merge into gt_test_0109 t9
    using gt_test_0110 t0
    on (t0.col_1 = t9.col_1 AND t0.col_2 is null)
    when matched then update set t9.col_2 = t0.col_2;



i do not tried that query for now, because my oracle sql developer hang up (damn JAVA Wink )
and waytin till it be operational again.

and one more thing, in your script, in merge statement you use t0.col_2 is not null, and in simple update statement you use t9.col_2 is not null.....


and I cant run my writen down query at home. sql developer shows me error that columns referenced in ON clause cannot be updated.
but at work i used that statement (see my previos posts, where i put my version of script, and there will be comented predicades t0.col_2 is null, and results there was better for merge statement... but there was other mistakes with table precaching, so i can see for now another results)

and one more thing, simple update and merge statement use is like
join.
if you need few rows from joined table, then nested loops will be faster. is you need lots of rows, then hash join wil be faster.
simple update = nested loops
merge update = hash join.
im working with large tables, so in 90% i use only hash join in my work...

[Updated on: Wed, 24 September 2008 11:07]

Report message to a moderator

Re: Need to improve query performance [message #350298 is a reply to message #350291] Wed, 24 September 2008 11:04 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
Well spotted with the Merge
The "correct" merge statement would be:
merge into gt_test_0109 t9
    using gt_test_0110 t0
    on (t0.col_1 = t9.col_1 AND t9.col_2 is null)
    when matched then update set t9.col_2 = t0.col_2;
as the test is to update gt_test_0109 only where gt_test_0109.col_2 is null.

If you try that merge, you'll find it errors - you're not allowed to update any of the columns in the ON clause.
I'm not certain you actually can use MERGE to do that sort of update without a view and an Instead Of trigger.
I'll try and look at that tomorrow.

[rewrite to add more details]

[Updated on: Wed, 24 September 2008 11:15]

Report message to a moderator

Re: Need to improve query performance [message #350301 is a reply to message #349657] Wed, 24 September 2008 11:13 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
yep, i found, but yesterday at my work it worked Very Happy
ill try it tomorow at work again ;/
interesting case ;/
maybe in oracle enterprise version it is possible, and restricted in oracle personal edition ? ;/

[Updated on: Wed, 24 September 2008 11:17]

Report message to a moderator

Re: Need to improve query performance [message #350327 is a reply to message #349657] Wed, 24 September 2008 14:34 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
@RAJANKK4
i made new query, i do not know will it be faster than yours, but please try it, and inform about results
one column i do not select at the moment, i need to know if this query performs better, if so then i finish it.

there is indexes on tables which accesed with 'MAX' subqueries?
and if it is what indexes?

SELECT  TL_PAYABLE_TIME.EMPLID 
      , TL_PAYABLE_TIME.EMPL_RCD 
      , TL_PAYABLE_TIME.DUR 
      , TL_PAYABLE_TIME.SEQ_NBR 
      , PERSONAL_DATA.NAME                       
      , JOB.BARG_UNIT                            
      , TL_PAYABLE_TIME.TRC 
-----------------------------------------------------------------------------------------------------------
--    , TL_TRC.DESCR AS OH_TRC_DESCR             
      ,( SELECT TL_TRC.DESCR
         FROM PS_TL_TRC_TBL TL_TRC
         WHERE     TL_TRC.TRC = TL_PAYABLE_TIME.TRC
               AND TL_TRC.EFFDT = ( SELECT MAX(TL_TRC_ED.EFFDT) 
                                    FROM PS_TL_TRC_TBL TL_TRC_ED 
                                    WHERE     TL_TRC_ED.TRC = TL_PAYABLE_TIME.TRC 
                                          AND TL_TRC_ED.EFFDT <= TL_PAYABLE_TIME.DUR )) AS OH_TRC_DESCR 
-----------------------------------------------------------------------------------------------------------
--    , TL_ERNCD.TL_ERNCD 
      ,( SELECT TL_ERNCD.TL_ERNCD
         FROM PS_OH_S_TL_ERNCD TL_ERNCD
         WHERE     TL_ERNCD.TRC = TL_PAYABLE_TIME.TRC
               AND TL_ERNCD.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM 
               AND TL_ERNCD.EFFDT = ( SELECT MAX(TL_ERNCD_ED.EFFDT) 
                                      FROM PS_OH_S_TL_ERNCD TL_ERNCD_ED 
                                      WHERE     TL_ERNCD_ED.TRC = TL_PAYABLE_TIME.TRC
                                           AND  TL_ERNCD_ED.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM
                                           AND TL_ERNCD_ED.EFFDT <= TL_PAYABLE_TIME.DUR )) AS TL_ERNCD
      , TL_PAYABLE_TIME.TL_QUANTITY 
-----------------------------------------------------------------------------------------------------------
--    , SCH_ASSIGN.SCHEDULE_ID                      
      ,( SELECT SCH_ASSIGN.SCHEDULE_ID
         FROM PS_OH_S_SCH_ASSIGN SCH_ASSIGN
         WHERE     SCH_ASSIGN.EMPLID = TL_PAYABLE_TIME.EMPLID
               AND SCH_ASSIGN.EMPL_RCD = TL_PAYABLE_TIME.EMPL_RCD
               AND SCH_ASSIGN.EFFDT = ( SELECT MAX(SCH_ASSIGN_ED.EFFDT) 
                                        FROM PS_OH_S_SCH_ASSIGN SCH_ASSIGN_ED 
                                        WHERE     SCH_ASSIGN_ED.EMPLID = TL_PAYABLE_TIME.EMPLID
                                              AND SCH_ASSIGN_ED.EMPL_RCD = TL_PAYABLE_TIME.EMPL_RCD
                                              AND SCH_ASSIGN_ED.EFFDT <= TL_PAYABLE_TIME.DUR 
                                              AND SCH_ASSIGN_ED.END_EFFDT >= TL_PAYABLE_TIME.DUR )) AS SCHEDULE_ID
-----------------------------------------------------------------------------------------------------------
--!!!!!      , SCH_CLND.SCHED_HRS                        !!!!  --------------------
      , NULL AS SCHED_HRS
      , TL_PAYABLE_TIME.PAYABLE_STATUS 
      , TL_PAYABLE_TIME.PAYROLL_REQ_NUM 
      , TL_PAYABLE_TIME.FROZEN_SW 
      , TL_PAYABLE_TIME.FROZEN_DATE 
      , TL_PAYABLE_TIME.PUBLISH_SW 
      , TL_PAYABLE_TIME.ACTUAL_PUB_DATE 
      , TL_PAYABLE_TIME.APPRV_PRCS_DTTM 
      , TL_PAYABLE_TIME.OPRID 
      , TL_PAYABLE_TIME.ORIG_SEQ_NBR 
      , TL_PAYABLE_TIME.OVERRIDE_RATE 
      , TL_PAYABLE_TIME.RECORD_ONLY_ADJ 
      , TL_PAYABLE_TIME.LBR_DIST_AMT 
      , TL_PAYABLE_TIME.EST_GROSS 
      , TL_PAYABLE_TIME.DILUTED_GROSS 
      , TL_PAYABLE_TIME.SOURCE_IND 
      , TL_PAYABLE_TIME.TASKGROUP 
      , TL_PAYABLE_TIME.BILLABLE_IND 
      , TL_PAYABLE_TIME.ACCT_CD 
      , TL_PAYABLE_TIME.ACCOUNT 
      , TL_PAYABLE_TIME.DEPTID_CF AS OH_CF_DEPTID 
      , TL_PAYABLE_TIME.PROJECT_ID 
      , TL_PAYABLE_TIME.FUND_CODE 
      , TL_PAYABLE_TIME.PROGRAM_CODE 
      , TL_PAYABLE_TIME.CLASS_FLD 
      , TL_PAYABLE_TIME.CHARTFIELD1 
      , TL_PAYABLE_TIME.CHARTFIELD2 
      , TL_PAYABLE_TIME.CHARTFIELD3 
      , TL_PAYABLE_TIME.BUDGET_REF 
      , JOB.COMPANY                            
      , TL_PAYABLE_TIME.BUSINESS_UNIT 
      , TL_PAYABLE_TIME.PRODUCT 
      , TL_PAYABLE_TIME.CUSTOMER 
      , TL_PAYABLE_TIME.BUSINESS_UNIT_PC 
      , TL_PAYABLE_TIME.BUSINESS_UNIT_PF 
      , TL_PAYABLE_TIME.SETID_ACTIVITY 
      , TL_PAYABLE_TIME.ACTIVITY_ID 
      , TL_PAYABLE_TIME.RESOURCE_TYPE 
      , TL_PAYABLE_TIME.SETID_RESOURCE 
      , TL_PAYABLE_TIME.RESOURCE_CATEGORY 
      , TL_PAYABLE_TIME.RESOURCE_SUB_CAT 
      , TL_PAYABLE_TIME.TASK 
      , TL_PAYABLE_TIME.USER_FIELD_1 
      , TL_PAYABLE_TIME.USER_FIELD_2 
      , TL_PAYABLE_TIME.USER_FIELD_3 
      , TL_PAYABLE_TIME.USER_FIELD_4 
      , TL_PAYABLE_TIME.USER_FIELD_5 
      , PERS_NID.NATIONAL_ID                   
      , JOB.SETID_DEPT AS SETID                
      , JOB.DEPTID                             
-----------------------------------------------------------------------------------------------------------
--      , DEPT.DESCR AS OH_DEPT_DESCR      
      ,( SELECT 
         FROM PS_DEPT_TBL DEPT
         WHERE     DEPT.DEPTID = JOB.DEPTID 
               AND DEPT.SETID = JOB.SETID_DEPT
               AND DEPT.EFFDT = ( SELECT MAX(DEPT_ED.EFFDT) 
                                  FROM PS_DEPT_TBL DEPT_ED 
                                  WHERE     DEPT_ED.DEPTID = JOB.DEPTID
                                        AND DEPT_ED.SETID = JOB.SETID_DEPT
                                        AND DEPT_ED.EFFDT <= JOB.EFFDT )) AS OH_DEPT_DESCR
      , JOB.POSITION_NBR                        
-----------------------------------------------------------------------------------------------------------
--    , POSITION_DATA.DESCR AS OH_POSITION_DESCR
      ,( SELECT POSITION_DATA.DESCR 
         FROM PS_S_POSITION_DATA POSITION_DATA
         WHERE     POSITION_DATA.POSITION_NBR = JOB.POSITION_NBR
               AND POSITION_DATA.EFFDT = (SELECT MAX(POSITION_DATA_ED.EFFDT) 
                                          FROM PS_S_POSITION_DATA POSITION_DATA_ED 
                                          WHERE     POSITION_DATA_ED.POSITION_NBR = JOB.POSITION_NBR
                                                AND POSITION_DATA_ED.EFFDT <= JOB.EFFDT )) AS OH_POSITION_DESCR
      , JOB.JOBCODE                         
-----------------------------------------------------------------------------------------------------------
--      , JOBCODE.DESCR AS OH_JOBCODE_DESCR 
      , ( SELECT JOBCODE.DESCR 
          FROM PS_JOBCODE_TBL JOBCODE
          WHERE     JOBCODE.JOBCODE = JOB.JOBCODE
                AND JOBCODE.SETID = JOB.SETID_JOBCODE
                AND JOBCODE.EFFDT = ( SELECT MAX(JOBCODE_ED.EFFDT) 
                                      FROM PS_JOBCODE_TBL JOBCODE_ED 
                                      WHERE     JOBCODE_ED.SETID = JOB.SETID_JOBCODE
                                            AND JOBCODE_ED.JOBCODE = JOB.JOBCODE
                                            AND JOBCODE_ED.EFFDT <= JOB.EFFDT )) AS OH_JOBCODE_DESCR
      , JOB.LOCATION                              
-----------------------------------------------------------------------------------------------------------
--    , LOCATION.DESCR AS OH_LOCATION_DESCR
      , ( SELECT LOCATION.DESCR
          FROM PS_LOCATION_TBL LOCATION
          WHERE     LOCATION.SETID = JOB.SETID_LOCATION 
                AND LOCATION.LOCATION = JOB.LOCATION
                AND LOCATION.EFFDT = ( SELECT MAX(LOCATION_ED.EFFDT) 
                                       FROM PS_LOCATION_TBL LOCATION_ED 
                                       WHERE     LOCATION_ED.SETID = JOB.SETID_LOCATION
                                             AND LOCATION_ED.LOCATION = JOB.LOCATION
                                             AND LOCATION_ED.EFFDT <= JOB.EFFDT )) AS OH_LOCATION_DESCR
      , JOB.UNION_CD 
-----------------------------------------------------------------------------------------------------------
--    , UNION_TBL.DESCR AS OH_UNION_DESCR
      ,( SELECT UNION_TBL.DESCR
         FROM PS_S_UNION_TBL UNION_TBL
         WHERE     UNION_TBL.UNION_CD = JOB.UNION_CD
               AND UNION_TBL.EFFDT = ( SELECT MAX(UNION_TBL_ED.EFFDT) 
                                       FROM PS_S_UNION_TBL UNION_TBL_ED 
                                       WHERE     UNION_TBL_ED.UNION_CD = JOB.UNION_CD
                                             AND UNION_TBL_ED.EFFDT <= JOB.EFFDT )) AS OH_UNION_DESCR

FROM  PS_TL_PAYABLE_TIME TL_PAYABLE_TIME 
INNER JOIN PS_PERSONAL_DATA PERSONAL_DATA   ON TL_PAYABLE_TIME.EMPLID = PERSONAL_DATA.EMPLID
INNER JOIN PS_PERS_NID PERS_NID             ON     TL_PAYABLE_TIME.EMPLID = PERS_NID.EMPLID 
                                               AND PERS_NID.COUNTRY = 'USA' 
                                               AND PERS_NID.NATIONAL_ID_TYPE = 'PR' 
INNER JOIN PS_JOB JOB                       ON      TL_PAYABLE_TIME.EMPLID = JOB.EMPLID 
                                                AND TL_PAYABLE_TIME.EMPL_RCD = JOB.EMPL_RCD 
                                                AND JOB.EFFDT = (  SELECT MAX(JOB_ED.EFFDT) 
                                                                   FROM PS_JOB JOB_ED 
                                                                   WHERE     JOB.EMPLID = JOB_ED.EMPLID 
                                                                         AND JOB.EMPL_RCD = JOB_ED.EMPL_RCD 
                                                                         AND JOB_ED.EFFDT <= TL_PAYABLE_TIME.DUR ) 
WHERE TL_PAYABLE_TIME.DUR >= TO_DATE('2008-01-01' , 'YYYY-MM-DD') 
Re: Need to improve query performance [message #350392 is a reply to message #349657] Wed, 24 September 2008 22:37 Go to previous messageGo to next message
harrysmall3
Messages: 109
Registered: April 2008
Location: Massachusetts
Senior Member
Guys, correct me if I am wrong but this is how I am interpreting
the aggregate part of the execution plan for the sub-selects

Step #28 is the parent for the execution path 28 - 58 with each step having one child to return rows up, each child executing once for its parent, and starting with step 58.

The results of step 28 are passed to the filter at the top of the select where records from the hash-joins paths are filtered
for final return to the top (SELECT).

When trying to improve any query i try to think of
what the 'better execution plan' would be.

No doubt the hash join is enormous and there's the time bulk.
I go back to my original statement on approaching this query
and I still believe that improvement lies in creating an execution plan that can filter as many records as possible up front from each join step.

If I have read the query plan correctly then I think this affirms my approach - all of the max/min values collected in
sub-selects are not applied until the end of the execution path
against the enormous data set of all the combined hash steps.

If the query could be modified so that the data retrieved from
the path 58 - 28 could be applied as filter steps for the innermost child steps of the hash paths for the respective tables they address, then bulk join steps will be significantly
improved in time.

So what I need to verify right now is if I am correct in
saying that the data retrieved by the sub-selects is being applied as a filter to the final result of all of the joined data?

In this case, the FILTER operation is like a nested loop,
every record returned by step2 is qualified by the series of lookups in 58 - 28 for inclusion/exclusion.

That is what I would want to change.


I'm trying to decompose the execution plan into blocks and
do this in a divide and conquer approach but I need to know if this premise is correct first.
Thx
Harry
Re: Need to improve query performance [message #350454 is a reply to message #350327] Thu, 25 September 2008 02:15 Go to previous messageGo to next message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
@RAJANKK4 - it's worth rewriting your query to try inline selects using Analytics as well - they are fast, and work well in cases like this:
SELECT  TL_PAYABLE_TIME.EMPLID 
      , TL_PAYABLE_TIME.EMPL_RCD 
      , TL_PAYABLE_TIME.DUR 
      , TL_PAYABLE_TIME.SEQ_NBR 
      , PERSONAL_DATA.NAME                       
      , JOB.BARG_UNIT                            
      , TL_PAYABLE_TIME.TRC 
-----------------------------------------------------------------------------------------------------------
--    , TL_TRC.DESCR AS OH_TRC_DESCR             
      ,( SELECT FIRST_VALUE(TL_TRC.DESCR) OVER (ORDER BY TL_TRC.EFFDT DESC)
         FROM   PS_TL_TRC_TBL   TL_TRC
         WHERE  TL_TRC.TRC   =  TL_PAYABLE_TIME.TRC
         AND    TL_TRC.EFFDT <= TL_PAYABLE_TIME.DUR ) AS OH_TRC_DESCR 
-----------------------------------------------------------------------------------------------------------
--    , TL_ERNCD.TL_ERNCD 
      ,( SELECT FIRST_VALUE(TL_ERNCD.TL_ERNCD) OVER (ORDER BY TL_ERNCD.EFFDT DESC)
         FROM   PS_OH_S_TL_ERNCD TL_ERNCD
         WHERE  TL_ERNCD.TRC = TL_PAYABLE_TIME.TRC
         AND    TL_ERNCD.PAY_SYSTEM = TL_PAYABLE_TIME.PAY_SYSTEM 
         AND TL_ERNCD.EFFDT  <= TL_PAYABLE_TIME.DUR ) AS TL_ERNCD
      , TL_PAYABLE_TIME.TL_QUANTITY 
Re: Need to improve query performance [message #350492 is a reply to message #349657] Thu, 25 September 2008 03:47 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
@JRowbottom - do you tested those select ?
because without analitical functions my select about 3-5 times faster.


select count(col_4)
from (
select t7.col_1
      ,t7.col_2
      ,(select distinct first_value(t8.col_4) over (order by t8.col_3 desc)
        from   test_0108 t8
        where  t8.col_2 = t7.col_2
        and    t8.col_3 <= t7.col_3) col_4
from test_0107 t7);   


select count(col_4)
from (
select t7.col_1
      ,t7.col_2
      ,(select t8.col_4 from test_0108 t8
        where t8.col_2 = t7.col_2
              and t8.col_3= (select max(t81.col_3) 
                             from test_0108 t81 
                             where  t81.col_2 = t7.col_2
                                    and t81.col_3<=t7.col_3)) col_4
from test_0107 t7); 

Re: Need to improve query performance [message #350496 is a reply to message #349657] Thu, 25 September 2008 03:50 Go to previous messageGo to next message
Kriptas
Messages: 118
Registered: June 2008
Senior Member
and in main query, i think, there is largest problem with this join sub select
INNER JOIN PS_JOB JOB                       ON      TL_PAYABLE_TIME.EMPLID = JOB.EMPLID 
                                                AND TL_PAYABLE_TIME.EMPL_RCD = JOB.EMPL_RCD 
                                                AND JOB.EFFDT = (  SELECT MAX(JOB_ED.EFFDT) 
                                                                   FROM PS_JOB JOB_ED 
                                                                   WHERE     JOB.EMPLID = JOB_ED.EMPLID 
                                                                         AND JOB.EMPL_RCD = JOB_ED.EMPL_RCD 
                                                                         AND JOB_ED.EFFDT <= TL_PAYABLE_TIME.DUR ) 



regarding execution plan, it multiply rownum from 16M till 41M, and then using filter to filter data...

but from that join there is lots of dependable colums, and other joins or inline selects....

Re: Need to improve query performance [message #350574 is a reply to message #350496] Thu, 25 September 2008 08:16 Go to previous messageGo to previous message
JRowbottom
Messages: 5933
Registered: June 2006
Location: Sunny North Yorkshire, ho...
Senior Member
Are there actually any records in the table PS_JOB with an EFFDT after the TL_PAYABLE_TIME.DUR date?

If there aren't then with the right index, that subquery can be made to fly.


Oddly, I did run timings on the analytic queries.
From the trace file, there is nothing to chose between them - same time to within 3%, same number of consistent reads
select *
     from (
     select t7.col_1
           ,t7.col_2
           ,(select first_value(t8.col_4) over (order by t8.col_3 desc)
             from   test_0108 t8
             where  t8.col_2 = t7.col_2
             and    t8.col_3  <= t7.col_3) col_4
     from test_0107 t7)
     where rownum > 1

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch        1      1.15       1.12       1167      12148          0           0
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        3      1.15       1.13       1167      12148          0           0


select *
     from (
     select t7.col_1
           ,t7.col_2
           ,(select t8.col_4
             from   test_0108 t8
             where  t8.col_2 = t7.col_2
             and    t8.col_3 = (select max(t81.col_3)
                                from   test_0108 t81
                                where  t7.col_2 = t81.col_2
                                and    t81.col_3 <= t7.col_3)) col_4
     from test_0107 t7)
     where rownum > 1

call     count       cpu    elapsed       disk      query    current        rows
------- ------  -------- ---------- ---------- ---------- ----------  ----------
Parse        1      0.00       0.00          0          0          0           0
Execute      1      0.00       0.00          0          0          0           0
Fetch        1      1.18       1.15          0      12148          0           0
------- ------  -------- ---------- ---------- ---------- ----------  ----------
total        3      1.18       1.16          0      12148          0           0
Previous Topic: Pagination in big result sets
Next Topic: Dictionary Cache Hit ratio
Goto Forum:
  


Current Time: Tue Nov 26 09:24:16 CST 2024