Friday, October 10, 2008

Oracle Useful SQL Scripts

1. String Concatenation Below are the scripts that will convert multiple records into single record. e.g. concatenate all the employee name based on their deptno separated by ',' as shown below:

DEPTNO AGG_ENAME 30 ALLEN,BLAKE,JAMES,MARTIN,TURNER,WARD 20 ADAMS,FORD,JONES,SCOTT,SMITH 10 CLARK,KING,MILLER
Method 1. with data as (select ename,deptno, row_number () over (partition by deptno order by ename) rn, count (*) over (partition by deptno order by ename) cnt
from emp
) select deptno, max (ltrim (sys_connect_by_path (ename, ','), ',')) agg_ename from data group by deptno start with rn = 1 connect by prior rn = rn - 1 and deptno = prior deptno ; Method 2. select deptno, ltrim(max(sys_connect_by_path(ename,',')) keep (dense_rank last order by curr),',') as employees from (select deptno,ename, row_number() over (partition by deptno order by ename) as curr, row_number() over (partition by deptno order by ename) -1 as prev from emp) group by deptno connect by prev = prior curr and deptno = prior deptno start with curr = 1; Method 3. create or replace function get_stringagg(p_id in number) return varchar2 as l_str varchar2(2000) default null; l_sep varchar2(1) default null; begin for i in (select text from example2 where id=p_id) loop l_str:= l_strl_sepi.text; l_sep:= ','; end loop; return l_str; end;

2. Spliting String
Reverse of above can be achieved as mentioned below.
First create a test table emp1 to store the above result as below:
create table emp1 as with data as (select ename,deptno, row_number () over (partition by deptno order by ename) rn, count (*) over (partition by deptno order by ename) cnt from emp
) select deptno, max (ltrim (sys_connect_by_path (ename, ','), ',')) agg_ename from data group by deptno start with rn = 1 connect by prior rn = rn - 1 and deptno = prior deptno ;
Then query emp1 as:
select deptno, ename from ( select deptno, trim(substr(str, instr(str, ',', 1, level) + 1,instr(str, ',', 1, level + 1) - instr(str, ',', 1, level) - 1)) ename, level lv, lag(level, 1, 0) over (partition by deptno order by level) lg from (select deptno, ','agg_ename',' str from emp1) connect by instr(str, ',', 1, level) > 0 ) where ename is not null and lv != lg;
3. Count Bad Characters in a String
Below query will give non-zero value if bad characters are there else 0. SELECT COALESCE (LENGTH (TRANSLATE ('St Louis', '.' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' '1234567890' ' ', '.' ) ), 0 ) "# bad characters" FROM DUAL;

4. Convert Alphanumeric string to Numeric
SELECT REPLACE( TRANSLATE('1234HELL01233LLL' , 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', ' ') ,' ' ,'') FROM DUAL;

5. Pull out digits from a string
select translate('..Af$34@#(}fj.s2347adfAdf%','.' || translate('..Af$34@#(}fj.s2347adfAdf%','.0123456789','.'),'.') from dual;

6. Create Flatten Hierarchy from Parent-Child Data Set

Suppose we have table with parent child relationship and we want to create flatten hierarchy till level12

Table:

pr ch
======  =======
A B
B C
B D
C E
D M
E F
F G

and we want to see data as

LEVEL0 LEVEL1 LEVEL2 LEVEL3 LEVEL4 LEVEL5 ...
A B C E F G
B D M


Method 1:
 select lv,
        level0,
        coalesce(level1,level0) level1,
        coalesce(level2,level1) level2,
        coalesce(level3,level2,level1) level3 ,
        coalesce(level4,level3,level2,level1) level4,
        coalesce(level5,level4,level3,level2,level1) level5,
        coalesce(level6,level5,level4,level3,level2,level1) level6,
        coalesce(level7,level6,level5,level4,level3,level2,level1) level7,
        coalesce(level8,level7,level6,level5,level4,level3,level2,level1) level8 , 
        coalesce(level9,level8,level7,level6,level5,level4,level3,level2,level1) level9, 
        coalesce(level10,level9,level8,level7,level6,level5,level4,level3,level2,level1) level10, 
        coalesce(level11,level10,level9,level8,level7,level6,level5,level4,level3,level2,level1) level11, 
        coalesce(level12,level11,level10,level9,level8,level7,level6,level5,level4,level3,level2,level1) level12 ,
        hr
from        
(
select lv,
       rn as level0,
       min(decode(lv,1,ch,substr(hr,(instr(hr,'\',1,2))+1,((instr(hr,'\',1,3) -instr(hr,'\',1,2))-1)) )) level1,
       min(decode(lv,2,ch,substr(hr,(instr(hr,'\',1,3))+1,((instr(hr,'\',1,4) -instr(hr,'\',1,3))-1)) )) level2,
       min(decode(lv,3,ch,substr(hr,(instr(hr,'\',1,4))+1,((instr(hr,'\',1,5) -instr(hr,'\',1,4))-1)) )) level3,
       min(decode(lv,4,ch,substr(hr,(instr(hr,'\',1,5))+1,((instr(hr,'\',1,6) -instr(hr,'\',1,5))-1)) )) level4,
       min(decode(lv,5,ch,substr(hr,(instr(hr,'\',1,6))+1,((instr(hr,'\',1,7) -instr(hr,'\',1,6))-1)) )) level5,
       min(decode(lv,6,ch,substr(hr,(instr(hr,'\',1,7))+1,((instr(hr,'\',1,8) -instr(hr,'\',1,7))-1)) )) level6,
       min(decode(lv,7,ch,substr(hr,(instr(hr,'\',1,8))+1,((instr(hr,'\',1,9) -instr(hr,'\',1,8))-1)) )) level7,
       min(decode(lv,8,ch,substr(hr,(instr(hr,'\',1,9))+1,((instr(hr,'\',1,10) -instr(hr,'\',1,9))-1)) )) level8,
       min(decode(lv,9,ch,substr(hr,(instr(hr,'\',1,10))+1,((instr(hr,'\',1,11) -instr(hr,'\',1,10))-1)) )) level9,
       min(decode(lv,10,ch,substr(hr,(instr(hr,'\',1,11))+1,((instr(hr,'\',1,12) -instr(hr,'\',1,11))-1)) )) level10,
       min(decode(lv,11,ch,substr(hr,(instr(hr,'\',1,12))+1,((instr(hr,'\',1,13) -instr(hr,'\',1,12))-1)) )) level11,
       min(decode(lv,12,ch,null )) level12,       
       hr    
from                               
(
select pr,ch,
       connect_by_root pr as rn,
       level as lv,
       sys_connect_by_path(pr,'\')||'\'||ch as hr
  from hier
 start with pr = 'Departments'
connect by  prior ch = pr  
) group by hr,lv,rn
);

Method 2:

select distinct coalesce(l.ch,k.ch,j.ch,i.ch,h.ch,g.ch,f.ch,e.ch,d.ch,c.ch,b.ch,a.ch) as id
,      a.pr
,      a.ch lv1, a.al lv1_al
,      b.ch lv2, b.al lv2_al
,      c.ch lv3, c.al lv3_al
,      d.ch lv4, d.al lv4_al
,      e.ch lv5, e.al lv5_al
,      f.ch lv6, f.al lv6_al
,      g.ch lv7, g.al lv7_al
,      h.ch lv8, h.al lv8_al
,      i.ch lv9, i.al lv9_al  
,      j.ch lv10, j.al lv10_al  
,      k.ch lv11, k.al lv11_al  
,      l.ch lv12, l.al lv12_al  
from hier a
,    hier b
,    hier c
,    hier d
,    hier e
,    hier f
,    hier g
,    hier h
,    hier i
,    hier j
,    hier k
,    hier l
where a.ch = b.pr(+)
and b.ch = c.pr(+)
and c.ch = d.pr(+)
and d.ch = e.pr(+)
and e.ch = f.pr(+)
and f.ch = g.pr(+)
and g.ch = h.pr(+)
and h.ch = i.pr(+)
and i.ch = j.pr(+)
and j.ch = k.pr(+)

and k.ch = l.pr(+)


Saturday, October 4, 2008

Oracle SQL Tuning TIPS

In this blog I am going to write about some basic rules and tips that can enhance query performance by reducing parsing,execution, or both. These rules are simple but may yield lots of benefits.

TIP1.

1. Use SQL standards within an application. Below are the simple rules that are easy to implement and allow more sharing within Oracle’s memory.
a. Using a single case for all SQL verbs
b. Beginning all SQL verbs on a new line
c. Right or left aligning verbs within the initial SQL verb
d. Separating all words with a single space

2. Use bind variables where ever possible.
3. Use a standard approach to table aliases. If two identical SQL statements vary because an identical table has two different aliases, then the SQL is different and will not be shared.
4. Use table aliases and prefix all column names by their aliases when more than one table is involved in a query. This reduces parse time AND prevents future syntax errors if someone adds a column to one of the tables with the same name as a column in another table. (ORA-00918: COLUMN AMBIGUOUSLY DEFINED)

TIP2.

Don’t perform operation on database objects referenced in the WHERE clause. Oracle will ignore the indexes defined on columns. Just for reference see below DO NOT USE/ USE section:

DO NOT USE
SELECT account_name, trans_date, amount
FROM transaction
WHERE SUBSTR(account_name,1,7) = 'CAPITAL';

SELECT account_name, trans_date, amount
FROM transaction
WHERE account_name = NVL ( :acc_name, account_name);

SELECT account_name, trans_date, amount
FROM transaction
WHERE TRUNC (trans_date) = TRUNC (SYSDATE);

SELECT account_name, trans_date, amount
FROM transaction
WHERE account_name account_type = 'AMEXA';

SELECT account_name, trans_date, amount
FROM transaction
WHERE amount + 3000 < not ="">


Instead use the below one:
USE
SELECT account_name, trans_date, amount
FROM transaction
WHERE account_name LIKE 'CAPITAL%';

SELECT account_name, trans_date, amount
FROM transaction
WHERE account_name LIKE NVL ( :acc_name, '%');

SELECT account_name, trans_date, amount
FROM transaction
WHERE trans_date BETWEEN TRUNC (SYSDATE) AND TRUNC (SYSDATE) + .99999;

SELECT account_name, trans_date, amount
FROM transaction
WHERE account_name = 'AMEX' AND account_type = 'A';

SELECT account_name, trans_date, amount
FROM transaction
WHERE amount <> 0;

SELECT account_name, trans_date, amount
FROM transaction
WHERE amount > 0;

TIP3.

Try not to use HAVING clause in the select statements. HAVING clause will filter records only after fetching all rows. Using WHERE clause helps reducing overhead in sorting, summing etc.HAVING clauses should only be used when columns with summary operations applied to them are restricted by the clause. For reference see below example.

DO NOT USE
SELECT region, AVG (loc_size)
FROM location
GROUP BY region
HAVING region != 'SYDNEY'
AND region != 'PERTH';

USE
SELECT region, AVG (loc_size)
FROM location
WHERE region != 'SYDNEY'
AND region != 'PERTH';
GROUP BY region;


TIP4.

Minimize the number of table lookups (subquery blocks) in queries, particularly if statements include subquery SELECTs or multicolumn UPDATEs. Avoid using subqueries when a JOIN will do the job.

Separate Subqueries
SELECT emp_name
FROM emp
WHERE emp_cat = (SELECT MAX (category) FROM emp_categories)
AND emp_range = (SELECT MAX (sal_range)FROM emp_categories)
AND emp_dept = 0020;

Combined Subqueries
SELECT emp_name
FROM emp
WHERE (emp_cat, sal_range) =
(SELECT MAX (category), MAX (sal_range)
FROM emp_categories) AND emp_dept = 0020;

TIP5.

Consider the alternatives EXISTS, IN and table joins when doing multiple table joins. None of these are consistently faster; it depends on the volume of data.
If the outer query is "big" and the inner query is "small", "IN" is generally more efficient. e.g.

select count(subobject_name)
from big
where object_id in ( select object_id from small );

versus:
select count(subobject_name)
from big
where exists ( select null from small where small.object_id = big.object_id );

If the outer query is "small" and the inner query is "big" "WHERE EXISTS" can be quite efficient. e.g.

select count(subobject_name)
from small
where object_id in ( select object_id from big );

versus:
select count(subobject_name)
from small
where exists ( select null from big where small.object_id = big.object_id );

TIP6.

Avoid joins that require the DISTINCT qualifier on the SELECT list in queries which are used to determine information at the owner end of a one-to-many relationship. The DISTINCT operator causes Oracle to fetch all rows satisfying the table join and then sort and filter out duplicate values. EXISTS is a faster alternative, because the Oracle optimizer realizes when the subquery has been satisfied once, there is no need to proceed further and the next matching row can be fetched.

DO NOT USE
SELECT DISTINCT dept_no, dept_name
FROM dept d, emp e
WHERE d.dept_no = e.dept_no;

USE
SELECT dept_no, dept_name
FROM dept d
WHERE EXISTS (SELECT 'X'
FROM emp e
WHERE e.dept_no = d.dept_no);

TIP7.

If possible use UNION ALL instead of UNION. The UNION clause forces all rows returned by each portion of the UNION to be sorted and merged and duplicates to be filtered before the first row is returned. A UNION ALL simply returns all rows including duplicates and does not have to perform any sort, merge or filter. If your tables are mutually exclusive (include no duplicate records), or you don't care if duplicates are returned, the UNION ALL is much more efficient.

TIP8.

Consider using DECODE to avoid having to scan the same rows repetitively or join the same table repetitively.

DO NOT USE
SELECT COUNT(*)
FROM emp
WHERE status = 'Y'
AND emp_name LIKE 'SMITH%';
--
SELECT COUNT(*)
FROM emp
WHERE status = 'N'
AND emp_name LIKE 'SMITH%';


USE
SELECT COUNT(DECODE(status, 'Y', 'X', NULL)) Y_count,
COUNT(DECODE(status, 'N', 'X', NULL)) N_count
FROM emp
WHERE emp_name LIKE 'SMITH%';

TIP9.

If query returns more than 20 percent of the rows in the table, use a full-table scan rather than an index scan.

TIP10.

Never mix data types in Oracle queries, as it will invalidate the index. If the column is numeric, remember not to use quotes (e.g., salary = 50000). For char index columns, always use single quotes (e.g., name = 'NAME').

TIP11.

To turn off an index (only with a cost-based optimizer), concatenate a null string to the index column name (e.g., name') or add zero to a numeric column name (e.g., salary+0). With the rule-based optimizer, this allows manually choose the most selective index to service the query.

TIP12.

Whenever possible, use the UNION statement instead of OR conditions.

TIP13.

Rewrite complex subqueries with temporary tables - Oracle created the global temporary table (GTT) and the SQL WITH operator to help divide-and-conquer complex SQL sub-queries (especially those with with WHERE clause subqueries, SELECT clause scalar subqueries and FROM clause in-line views). Tuning SQL with temporary tables (and materializations in the WITH clause) can result in amazing performance improvements.

TIP14.

Use minus instead of EXISTS subqueries - Using the minus operator instead of NOT IN and NOT EXISTS will result in a faster execution plan.

TIP15.

Use SQL analytic functions - The Oracle analytic functions can do multiple aggregations (e.g. rollup by cube) with a single pass through the tables, making them very fast for reporting SQL.

TIP16.

Re-write NOT IN and NOT EXISTS subqueries as outer joins - In many cases of NOT queries (but ONLY where a column is defined as NULL), we can re-write the uncorrelated subqueries into outer joins with IS NULL tests. Note that this is a non-correlated sub-query, but it could be re-written as an outer join. e.g.

SELECT book_key
FROM book
WHERE book_key NOT IN (SELECT book_key FROM sales);
-
SELECT book_key
FROM book
WHERE NOT EXISTS (SELECT book_key FROM sales);

Above two can be re-written as
SELECT b.book_key
FROM book b, sales s
WHERE b.book_key = s.book_key(+) AND s.book_key IS NULL;

TIP17.

Index NULL values - If we have SQL that frequently tests for NULL, consider creating an index on NULL values. To get around the optimization of SQL queries that choose NULL column values (i.e. where emp_name IS NULL), we can create a function-based index using the null value built-in SQL function to index only on the NULL columns. e.g.

--create an FBI on ename column with NULL values
create index emp_null_ename_idx on emp (nvl(ename,'null'));

analyze index emp_null_ename_idx compute statistics;

Same techniques with NULL numeric values. This syntax replaces NULL values with a zero:

--create an FBI on emp_nbr column with NULL values
create index emp_null_emp_nbr_idx on emp (nvl(ename,o));

analyze index emp_null_ename_idx compute statistics;

Now we can use the index and greatly improve the speed of any queries that require access to the NULL columns. Note that we must make one of two changes:

1- Add a hint to force the index
2- Change the WHERE predicate to match the function

Here is an example of using an index on NULL column values:

-- test the index access (change predicate to use FBI)
select /*+ index(emp_null_ename_idx) */ename
from emp e
where nvl(ename,'null') = 'null';

TIP18.

Achieve faster SQL performance with dbms_stats. To choose the best execution plan for a SQL query, oracle relies on information about the tables and indexes in the query. Execution plan includes, which index to use to retrieve table row, what order in which to join multiple tables together, and which internal join methods to use (Oracle has nested loop joins, hash joins, star joins, and sort merge join methods). These execution plans are computed by the Oracle cost-based SQL optimizer commonly known as the CBO.
Use dbms_stats utility to estimate the stats.

Syntax:

exec dbms_stats.gather_database_stats;
exec dbms_stats.gather_database_stats(estimate_percent => 15);

exec dbms_stats.gather_schema_stats('SCOTT');
exec dbms_stats.gather_schema_stats('SCOTT', estimate_percent => 15);

exec dbms_stats.gather_table_stats('SCOTT', 'EMPLOYEES');
exec dbms_stats.gather_table_stats('SCOTT', 'EMPLOYEES', estimate_percent => 15);

exec dbms_stats.gather_index_stats('SCOTT', 'EMPLOYEES_PK');
exec dbms_stats.gather_index_stats('SCOTT', 'EMPLOYEES_PK', estimate_percent => 15);

exec dbms_stats.gather_schema_stats( -
ownname => 'SCOTT', -
options => 'GATHER AUTO', -
estimate_percent => dbms_stats.auto_sample_size, -
method_opt => 'for all columns size repeat', -
cascade => true, -
degree => 15 -
);