Apriori Algorithm in pl/sql
********************************************************************************
APRIORI ALGORITHM
********************************************************************************
Modified March 08 2019
********************************************************************************
This script is for educational purpose only.
I created the table in Oracle 12C DB, used sql and pl/sql to find the frequent patterns.
This script works well with smaller datasets.
When dealing with large/voluminous datasets(GB of data), a different approach is to be implemented,
to handle voluminous data efficiently.
When handling data from different sources/formats, the script need to be
modified accordingly to handle data as may be necessary.
Readers discretion/ingenuity is advised, in making necessary changes/modifications as may be required for thier specific use.
********************************************************************************
"An idle man's brain is a devil's workshop".
Why can't I convert my idle brain into a data mining workshop.
My endeavour is to explore data mining algorithms in sql & pl/sql.
I am not running Oracle's datamining tool.
I generated these scripts on Oracle 11G.
My passion is for complex number crunching algorithms using shell or pl/sql or python languages.
export ORACLE_SID=orcl
export ORACLE_HOME=c:/app/oracle/product/12.2.0/dbhome_1
$ORACLE_HOME/bin/sqlplus " / as sysdba"
alter session set container=orclpdb;
--------------------------------------------------------------------------------
connect veeksha/saketh
drop table apriori;
create table apriori(items varchar2(50));
truncate table apriori;
insert into apriori values('apples,chacolates,donuts');
insert into apriori values('bananas,chacolates,eggs');
insert into apriori values('apples,bananas,chacolates,eggs');
insert into apriori values('bananas,eggs');
commit;
select items from apriori;
ITEMS
--------------------------------------------------------------------------------
apples,chacolates,donuts
bananas,chacolates,eggs
apples,bananas,chacolates,eggs
bananas,eggs
Based on the table apriori, I created a view as follows:
create or replace view apriori_vw as
select distinct trim(name) name from (
select
regexp_substr(items,'[^,]+', 1, level) as name,
rownum as nth
from apriori
connect by regexp_substr(items, '[^,]+', 1, level) is not null) order by 1;
--IMPLEMENTATION USING NESTED LOOPS
--------------------------------------------------------------------------------
Modified March 08 2019
set serverout on size 1000000 timing on
declare
v_sup number:=2; --MINIMUM SUPPORT FOR IMPLEMENTING APRIORI ALGORITHM
v_cnt varchar2(200);
v_num number;
v_str varchar2(2000);
begin
dbms_output.put_line('NUM_OCCURANCES NUM_ITEMS ITEMS');
for c1 in (select name from apriori_vw order by name) loop
begin
v_cnt:=c1.name;
v_str:='select nvl(count(*),0) from apriori where items like '''||'%'||c1.name||'%'||''' ';
execute immediate v_str into v_num;
if (v_num >= v_sup) then
dbms_output.put_line(rpad(v_num,14)||' '||rpad((regexp_count(v_cnt,',')+1),10)||' '||v_cnt);
for c2 in (select name from apriori_vw where name>c1.name order by 1) loop
begin
execute immediate v_str||' and items like '''||'%'||c2.name||'%'||''' ' into v_num;
if (v_num >= v_sup) then
v_str:=v_str||' and items like '''||'%'||c2.name||'%'||''' ';
v_cnt:=v_cnt||','||c2.name;
dbms_output.put_line(rpad(v_num,14)||' '||rpad((regexp_count(v_cnt,',')+1),10)||' '||v_cnt);
end if;
execute immediate 'select nvl(count(*),0) from apriori where items like '''||'%'||c1.name||'%'||
''' and items like '''||'%'||c2.name||'%'||''' ' into v_num;
if (v_num >= v_sup and c1.name||','||c2.name <> v_cnt) then
dbms_output.put_line(rpad(v_num,14)||' '||rpad(2,10)||' '||c1.name||','||c2.name);
--null;
end if;
exception
when others then
null;
end;
end loop;
end if;
exception
when others then
null;
end;
end loop;
end;
/
NUM_OCCURANCES NUM_ITEMS ITEMS
2 1 apples
2 2 apples,chacolates
3 1 bananas
2 2 bananas,chacolates
2 3 bananas,chacolates,eggs
3 2 bananas,eggs
3 1 chacolates
2 2 chacolates,eggs
3 1 eggs
PL/SQL procedure successfully completed.
--IMPLEMENTATION USING CONDITIONAL LOGIC
--------------------------------------------------------------------------------
Modified March 08 2019
connect veeksha/saketh
set serverout on size 1000000 timing on
declare
v_min varchar2(20);
v_max varchar2(20);
v_wrd varchar2(20);
v_cnt varchar2(2000);
--select items from apriori;
v_sup number:=2; --MINIMUM SUPPORT FOR IMPLEMENTING APRIORI ALGORITHM
v_num number;
v_trn number;
v_str varchar2(2000);
begin
dbms_output.put_line('NUM_OCCURANCES NUM_ITEMS ITEMS');
execute immediate 'select max(name) from apriori_vw' into v_max;
execute immediate 'select count(*) from apriori' into v_trn;
for c1 in (select name from apriori_vw order by name) loop
v_wrd:=c1.name;
v_str:='select nvl(count(*),0) from apriori where items like '''||'%'||c1.name||'%'||''' ';
execute immediate v_str into v_num;
if (v_num>=v_sup) then
v_cnt:=c1.name;
dbms_output.put_line(rpad(v_num,14)||' '||rpad((regexp_count(v_cnt,',')+1),10)||' '||v_cnt);
<
execute immediate 'select nvl(min(name),'''||'zzz'||''') from apriori_vw where name>'''||v_wrd||''' ' into v_min;
if (v_min<>'zzz') then
v_wrd:=v_min;
execute immediate v_str||' and items like '''||'%'||v_min||'%'||''' ' into v_num;
if (v_num>=v_sup) then
v_cnt:=v_cnt||','||v_min;
v_str:=v_str||' and items like '''||'%'||v_min||'%'||''' ';
dbms_output.put_line(rpad(v_num,14)||' '||rpad((regexp_count(v_cnt,',')+1),10)||' '||v_cnt);
end if;
execute immediate 'select nvl(count(*),0) from apriori where items like '''||'%'||c1.name||'%'||
''' and items like '''||'%'||v_wrd||'%'||''' ' into v_num;
if (v_num>=v_sup and v_cnt<>c1.name||','||v_wrd) then
dbms_output.put_line(rpad(v_num,14)||' '||rpad(2,10)||' '||c1.name||','||v_wrd);
end if;
goto BEGIN_LOOP;
end if;
end if;
end loop;
end;
/
NUM_OCCURANCES NUM_ITEMS ITEMS
2 1 apples
2 2 apples,chacolates
3 1 bananas
2 2 bananas,chacolates
2 3 bananas,chacolates,eggs
3 2 bananas,eggs
3 1 chacolates
2 2 chacolates,eggs
3 1 eggs
PL/SQL procedure successfully completed.
There are few other methods to produce the required pattern results, the cusious readers may explore.
Happy scripting.
REFERENCES:
https://www.codeproject.com/Articles/70371/Apriori-Algorithm
http://www.vldb.org/conf/2004/DEMP2.PDF --COMPUTING FREQUENT DATA SETS IN ORACLE
https://community.oracle.com/thread/2131403?start=0&tstart=0 --Converting delimited string into table columns
https://www.oracle.com/technetwork/database/enterprise-edition/odm-techniques-algorithms-097163.html
https://www.oracle.com/technetwork/database/options/advanced-analytics/odm/overview/index.html
- jp_vijaykumar's blog
- Log in to post comments