oracle - Oracle Connect By vs Recursive User-Defined Function 性能
问题描述
我正在使用 Connect By 和用户定义的函数进行基本性能检查,以获取父值。似乎使用用户定义的函数比 Connect By 查询执行得更好。
我想知道与 Connect By 相比,使用用户定义的函数是否应该具有更好的性能。
create table org ( pid number, cid number, type varchar2(10), name varchar2(30) );
alter table org add constraint org_pk primary key ( cid ); -- UPDATE#2
insert into org values (null,1,'MGT','OP');
insert into org values (1,2,'DEP','HR');
insert into org values (1,3,'DEP','IT');
insert into org values (3,4,'DIV','WEB');
insert into org values (3,5,'DIV','DB');
insert into org values (4,6,'SEC','HTML');
insert into org values (4,7,'SEC','JAVA');
create or replace function get_dep ( p_cid in number ) return number
is
l_pid number;
l_cid number;
l_type varchar2(30);
begin
select pid
, cid
, type
into l_pid
, l_cid
, l_type
from org
where cid = p_cid;
if ( l_type = 'MGT' ) then
return null;
elsif ( l_type = 'DEP' ) then
return l_cid;
else
return get_dep ( l_pid );
end if;
end;
/
select cid --correction
from org
where type = 'DEP'
start
with cid = 7
connect
by
prior pid = cid
and
prior type != 'DEP'
-------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 2 | 66 | 6 (17)| 00:00:01 |
|* 1 | FILTER | | | | | |
|* 2 | CONNECT BY NO FILTERING WITH START-WITH| | | | | |
| 3 | TABLE ACCESS FULL | ORG | 7 | 231 | 5 (0)| 00:00:01 |
-------------------------------------------------------------------------------------------------
select get_dep ( cid )
from org
where cid = 7;
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 1 | 13 | 5 (0)| 00:00:01 |
|* 1 | TABLE ACCESS FULL| ORG | 1 | 13 | 5 (0)| 00:00:01 |
--------------------------------------------------------------------------
更新#1: 我更新了函数以添加逻辑以在 id 为 MGT 时返回 null。
此外,更改查询以获取表中的所有记录。
select cid, ( select cid
from org
where type = 'DEP'
start
with cid = m.cid
connect
by
prior pid = cid
and
prior type != 'DEP' ) dep
from org m;
-------------------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-------------------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 7 | 91 | 10 (0)| 00:00:01 |
|* 1 | FILTER | | | | | |
|* 2 | CONNECT BY NO FILTERING WITH START-WITH| | | | | |
| 3 | TABLE ACCESS FULL | ORG | 7 | 231 | 5 (0)| 00:00:01 |
| 4 | TABLE ACCESS FULL | ORG | 7 | 91 | 5 (0)| 00:00:01 |
-------------------------------------------------------------------------------------------------
select cid, get_dep ( cid ) dep
from org;
--------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
--------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 7 | 91 | 5 (0)| 00:00:01 |
| 1 | TABLE ACCESS FULL| ORG | 7 | 91 | 5 (0)| 00:00:01 |
--------------------------------------------------------------------------
更新#2:按照建议添加索引。解释计划在两者上都有改进,但使用用户定义函数的查询仍然基于解释计划执行得更好(除非我没有正确解释计划)。
select cid, ( select cid
from org
where type = 'DEP'
start
with cid = m.cid
connect
by
prior pid = cid
and
prior type != 'DEP' ) dep
from org m;
-----------------------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
-----------------------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 7 | 91 | 4 (0)| 00:00:01 |
|* 1 | FILTER | | | | | |
|* 2 | CONNECT BY WITH FILTERING | | | | | |
| 3 | TABLE ACCESS BY INDEX ROWID | ORG | 1 | 33 | 1 (0)| 00:00:01 |
|* 4 | INDEX UNIQUE SCAN | ORG_PK | 1 | | 0 (0)| 00:00:01 |
| 5 | NESTED LOOPS | | 1 | 53 | 2 (0)| 00:00:01 |
|* 6 | CONNECT BY PUMP | | | | | |
| 7 | TABLE ACCESS BY INDEX ROWID| ORG | 1 | 33 | 1 (0)| 00:00:01 |
|* 8 | INDEX UNIQUE SCAN | ORG_PK | 1 | | 0 (0)| 00:00:01 |
| 9 | INDEX FULL SCAN | ORG_PK | 7 | 91 | 1 (0)| 00:00:01 |
-----------------------------------------------------------------------------------------
select cid, get_dep ( cid ) dep
from org;
---------------------------------------------------------------------------
| Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time |
---------------------------------------------------------------------------
| 0 | SELECT STATEMENT | | 7 | 91 | 1 (0)| 00:00:01 |
| 1 | INDEX FULL SCAN | ORG_PK | 7 | 91 | 1 (0)| 00:00:01 |
---------------------------------------------------------------------------
感谢任何反馈。
解决方案
首先,在您的示例中,SQL 和 PL/SQL 返回不同的结果。
SQL> select pid
2 from org
3 where type = 'DEP'
4 start
5 with cid = 7
6 connect
7 by
8 prior pid = cid
9 and
10 prior type != 'DEP';
PID
----------
1
SQL>
SQL> select get_dep ( cid )
2 from org
3 where cid = 7;
GET_DEP(CID)
------------
3
其次,在如此极小的数据量上比较不同的方法并没有真正的意义。
假设我们有一棵深度为 999 999 的树,并且想要找到给定节点的根。在我的示例中,只有一棵树(实际上是一个列表,因为每个父节点都有一个子节点),因此所有节点的根节点都是相同的。重要的是:给定 ID 的深度越大,执行时间越长。
create table org0 ( pid number, cid number, name varchar2(30) );
insert into org0
select rownum, rownum+1, 'name' || rpad(rownum,25,'#')
from dual
connect by rownum < 1e6;
alter table org0 add constraint org0_pk primary key ( cid );
返回根函数
create or replace function get_id(p_cid in number) return number is
l_pid number;
begin
select pid into l_pid from org0 where cid = p_cid;
return get_id(l_pid);
exception
when no_data_found then
return p_cid;
end get_id;
/
测试
SQL
SQL> select pid id
2 from org0
3 where connect_by_isleaf = 1
4 start with cid = 10000
5 connect by prior pid = cid;
ID
----------
1
Elapsed: 00:00:00.07
SQL>
SQL> select pid id
2 from org0
3 where connect_by_isleaf = 1
4 start with cid = 100000
5 connect by prior pid = cid;
ID
----------
1
Elapsed: 00:00:00.55
SQL>
SQL> select pid id
2 from org0
3 where connect_by_isleaf = 1
4 start with cid = 1000000
5 connect by prior pid = cid;
ID
----------
1
Elapsed: 00:00:05.79
PL/SQL
SQL> select get_id(10000) id from dual;
ID
----------
1
Elapsed: 00:00:00.15
SQL> select get_id(100000) id from dual;
ID
----------
1
Elapsed: 00:00:01.47
SQL> select get_id(1000000) id from dual;
ID
----------
1
Elapsed: 00:00:14.83
如您所见,PL/SQL 大约慢了 2 倍。
在某些特定情况下,PL/SQL 可能会更快(不适合您的任务)。您可以在这本书Oracle SQL Revealed的“当 PL/SQL 比 Vanilla SQL 更好”一章中阅读有关细粒度性能分析和使用 dbms_hprof 等工具的信息。
推荐阅读
- python - 将 lambda 表达式转换为简单函数
- postgresql - Google Cloud SQL PG11:无法调整共享内存段的大小
- webgl - 如何从纹理格式属性中选择 WebGL GLSL 采样器类型?
- reactjs - 简单的原子重置和验证检查未按预期工作
- javascript - 如何使正则表达式条件语句为假
- r - 在 Google Colaboratory 中加载 CSV 文件
- reactjs - React-Testing-Library - 使用 Redux 和路由器包装组件
- r - R - 安装包的开发版本时更新 rlang 时出现问题
- c++ - Qt:从 QTableView 中删除轮廓时的奇怪行为
- python - 如何创建生成列表的所有可能组合直至上限的蛮力程序?