sql - 从 oracle 中的嵌套目录创建外部表
问题描述
如何从嵌套目录创建外部表(目录名称将是列值)一个目录可能有多个子目录下面给出一个简单的例子
国家→印度→卡纳塔克邦→班加罗尔→….data.txt
国家→印度→马哈拉施特拉邦→孟买→..data1.txt
预期表
Country | India | Karnataka | Bengaluru |…content of data.txt
Country | India | Maharashtra| Mumbai|…content of data1.txt
解决方案
这是我在一个项目中遇到的一个有趣的问题。因此,我将基本上展示我所做的以及您需要的所有组件。
成分
- 将外部数据填充到 csv 文件中的 Shell 脚本
- 数据库对象(表外部和最终表)
- 用于处理文件外部内容的 Clob 函数和 Blob 函数
- 最终目录中的动态文件数意味着每个文件一行
设想
假设我有以下结构
ls -R | grep ":$" | sed -e 's/:$//' -e 's/[^-][^\/]*\//--/g' -e 's/^/ /' -e 's/-/|/'
.
|-India
|---Karnataka
|-----Bengaluru
|-Spain
|---Catalunya
|-----Barcelona
$ ls -R
.:
India Spain
./India:
Karnataka
./India/Karnataka:
Bengaluru
./India/Karnataka/Bengaluru:
data.txt
./Spain:
Catalunya
./Spain/Catalunya:
Barcelona
./Spain/Catalunya/Barcelona:
data1.txt data2.txt
按照您的逻辑,我需要一个外部表,如下所示
Country|India|Karnataka|Bengaluru|Filename|content of data.txt
Country|Spain|Catalonia|Barcelona|Filename|content of data1.txt
Country|Spain|Catalonia|Barcelona|Filename|content of data2.txt
文件内容
pwd
/test/India/Karnataka/Bengaluru
$ cat data.txt
This is an example of text for this city
$ cd ../../../Spain/Catalunya/Barcelona/
$ cat data1.txt
Another example for Catalonia and Barcelona
$ cat data2.txt
Second example for Catalonia and Barcelona
所以,我们的外部表将是
CREATE TABLE EXTERNAL_DATA (
TYPE VARCHAR2(4000),
NAME VARCHAR2(4000),
REGION VARCHAR2(4000),
CITY VARCHAR2(4000),
FILENAME VARCHAR2(4000)
)
ORGANIZATION EXTERNAL (
TYPE ORACLE_LOADER
DEFAULT DIRECTORY dir_util_db
ACCESS PARAMETERS (
RECORDS DELIMITED BY newline
FIELD NAMES ALL FILES IGNORE
BADFILE dir_util_db:'ext_tab_inp.bad'
DISCARDFILE dir_util_db:'ext_tab_inp.dsc'
LOGFILE dir_util_db:'ext_tab_inp.log'
FIELDS TERMINATED BY '|'
NOTRIM
MISSING FIELD VALUES ARE NULL
REJECT ROWS WITH ALL NULL
FIELDS
(
TYPE CHAR(4000),
NAME CHAR(4000),
REGION CHAR(4000),
CITY CHAR(4000),
FILENAME CHAR(4000)
)
)
LOCATION ('ext_data_test.csv')
)
REJECT LIMIT UNLIMITED;
我们的最终表将包括 blob 列。
CREATE TABLE FINAL_DATA (
TYPE VARCHAR2(10),
NAME VARCHAR2(20),
REGION VARCHAR2(20),
CITY VARCHAR2(20),
FILENAME VARCHAR2(20),
DATA BLOB
)
我们将需要一些函数来从文件系统中检索 blob 文件并将其内容存储为 blob 数据。我们还需要一个函数来将 blob 内容转换为 clob 文本。
一步步
外部表的记录数
$ find . -name "*.txt" | wc -l
3
填写csv文件
$ echo "TYPE|NAME|REGION|CITY|FILENAME" > ext_tab_test.csv
$ for i in $(find . -name "*.txt" -type f -print)
> do
> var=$(echo $i)
> record_type="Country"
> record_name=$(echo $var | awk -F '/' '{print $2}')
> record_region=$(echo $var | awk -F '/' '{print $3}')
> record_city=$(echo $var | awk -F '/' '{print $4}')
> record_file=$(echo $var | awk -F '/' '{print $NF}')
> echo "$record_type|$record_name|$record_region|$record_city|$record_file" >> ext_tab_test.csv
> done
$ cat ext_tab_test.csv
TYPE|NAME|REGION|CITY|FILENAME
Country|India|Karnataka|Bengaluru|data.txt
Country|Spain|Catalunya|Barcelona|data1.txt
Country|Spain|Catalunya|Barcelona|data2.txt
让我们去数据库
创建外部表所在目录
SQL> create or replace directory dir_test as '/test' ;
Directory created.
创建外部表
SQL> CREATE TABLE EXTERNAL_DATA (
TYPE VARCHAR2(4000),
NAME VARCHAR2(4000),
REGION VARCHAR2(4000),
CITY VARCHAR2(4000),
FILENAME VARCHAR2(4000)
)
ORGANIZATION EXTERNAL (
TYPE ORACLE_LOADER
DEFAULT DIRECTORY dir_test
ACCESS PARAMETERS (
RECORDS DELIMITED BY newline
FIELD NAMES ALL FILES IGNORE
BADFILE dir_test:'ext_tab_test.bad'
DISCARDFILE dir_test:'ext_tab_test.dsc'
LOGFILE dir_test:'ext_tab_test.log'
FIELDS TERMINATED BY '|'
NOTRIM
MISSING FIELD VALUES ARE NULL
REJECT ROWS WITH ALL NULL
FIELDS
(
TYPE CHAR(4000),
NAME CHAR(4000),
REGION CHAR(4000),
CITY CHAR(4000),
FILENAME CHAR(4000)
)
)
LOCATION ('ext_tab_test.csv')
)
REJECT LIMIT UNLIMITED;
Table created.
SQL> col type for a40
SQL> col name for a40
SQL> col region for a40
SQL> col city for a40
SQL> col filename for a40
SQL> set lines 200
SQL> select * from EXTERNAL_DATA
TYPE NAME REGION CITY FILENAME
---------- ---------------------------------------- ---------------------------------------- ---------------------------------------- ----------------------------------------
Country India Karnataka Bengaluru data.txt
Country Spain Catalunya Barcelona data1.txt
Country Spain Catalunya Barcelona data2.txt
现在我们要将这个外部表的内容合并到我们的最终表中。 创建决赛桌
SQL> CREATE TABLE FINAL_DATA
(
TYPE VARCHAR2(10),
NAME VARCHAR2(20),
REGION VARCHAR2(20),
CITY VARCHAR2(20),
FILENAME VARCHAR2(20),
DATA BLOB
) 2 3 4 5 6 7 8 9 ;
Table created.
将外部表中的数据插入/合并到最终表中
SQL> merge into FINAL_DATA target
using ( select * from EXTERNAL_DATA ) source
on ( target.type = source.type and
target.name = source.name and
target.region = source.region and
target.city = source.city
)
2 3 4 5 6 7 8 when not matched then
9 insert ( type, name, region, city, filename )
values
( source.type , source.name , source.region, source.city , source.filename ); 10 11
3 rows merged.
SQL> commit ;
Commit complete.
现在我们需要将 blob 对象(文件)转换为 blob 对象并将其存储到表中的过程
CREATE OR REPLACE FUNCTION os_file_to_blob(p_dir in varchar2 , p_file_name VARCHAR2) RETURN BLOB AS
dest_loc BLOB := empty_blob();
src_loc BFILE := BFILENAME(p_dir, p_file_name);
BEGIN
DBMS_LOB.OPEN(src_loc, DBMS_LOB.LOB_READONLY);
DBMS_LOB.CREATETEMPORARY(
lob_loc => dest_loc
, cache => true
, dur => dbms_lob.session
);
DBMS_LOB.OPEN(dest_loc, DBMS_LOB.LOB_READWRITE);
DBMS_LOB.LOADFROMFILE(
dest_lob => dest_loc
, src_lob => src_loc
, amount => DBMS_LOB.getLength(src_loc));
DBMS_LOB.CLOSE(dest_loc);
DBMS_LOB.CLOSE(src_loc);
RETURN dest_loc;
END os_file_to_blob;
/
以及将 blob 转换为 clob 的函数
create or replace function fil_rep.blob_to_char (b blob)
return clob is
v_clob clob;
n number;
v_start pls_integer := 1;
v_buffer pls_integer := 32767;
v_varchar varchar2(32767);
begin
if (b is null)
then
return null;
end if;
if (dbms_lob.getlength(b)=0)
then
return empty_clob();
end if;
dbms_lob.createtemporary(v_clob,true);
for i in 1..ceil(dbms_lob.getlength(b) / v_buffer)
loop
v_varchar := utl_raw.cast_to_varchar2(dbms_lob.substr(b, v_buffer, v_start));
dbms_lob.writeappend(v_clob, length(v_varchar), v_varchar);
v_start := v_start + v_buffer;
end loop;
RETURN v_clob;
end blob_to_char;
/
创建用于斑点处理的函数
SQL> CREATE OR REPLACE FUNCTION os_file_to_blob(p_dir in varchar2 , p_file_name VARCHAR2) RETURN BLOB AS
2 dest_loc BLOB := empty_blob();
3 src_loc BFILE := BFILENAME(p_dir, p_file_name);
4 BEGIN
5 DBMS_LOB.OPEN(src_loc, DBMS_LOB.LOB_READONLY);
6 DBMS_LOB.CREATETEMPORARY(
7 lob_loc => dest_loc
8 , cache => true
9 , dur => dbms_lob.session
10 );
11 DBMS_LOB.OPEN(dest_loc, DBMS_LOB.LOB_READWRITE);
12 DBMS_LOB.LOADFROMFILE(
13 dest_lob => dest_loc
14 , src_lob => src_loc
15 , amount => DBMS_LOB.getLength(src_loc));
16 DBMS_LOB.CLOSE(dest_loc);
17 DBMS_LOB.CLOSE(src_loc);
18 RETURN dest_loc;
19* END os_file_to_blob;
/
Function created.
SQL> create or replace function blob_to_char (b blob)
2 return clob is
3 v_clob clob;
4 n number;
5 v_start pls_integer := 1;
6 v_buffer pls_integer := 32767;
7 v_varchar varchar2(32767);
8 begin
9 if (b is null)
10 then
11 return null;
12 end if;
13 if (dbms_lob.getlength(b)=0)
14 then
15 return empty_clob();
16 end if;
17 dbms_lob.createtemporary(v_clob,true);
18 for i in 1..ceil(dbms_lob.getlength(b) / v_buffer)
19 loop
20 v_varchar := utl_raw.cast_to_varchar2(dbms_lob.substr(b, v_buffer, v_start));
21 dbms_lob.writeappend(v_clob, length(v_varchar), v_varchar);
22 v_start := v_start + v_buffer;
23 end loop;
24 RETURN v_clob;
25* end blob_to_char;
/
Function created.
现在,我们需要为我们拥有的每个位置创建一个目录
要创建的目录
create or replace directory dir_01 as '/test/India/Karnataka/Bengaluru' ;
create or replace directory dir_02 as '/test/Spain/Catalunya/Barcelona' ;
更新每个目录的 blob
SQL> DECLARE
v_blob BLOB;
v_file varchar2(40);
BEGIN
for h in
( select filename from final_data where data is null and name = 'India' )
loop
v_file := h.filename;
begin
v_blob := os_file_to_blob ('DIR_01' , v_file);
update final_data set data = v_blob where filename = v_file;
commit;
exception when others then null;
end;
end loop;
end;
/
PL/SQL procedure successfully completed.
SQL> DECLARE
v_blob BLOB;
v_file varchar2(40);
BEGIN
for h in
( select filename from final_data where data is null and name = 'Spain' )
loop
v_file := h.filename;
begin
v_blob := os_file_to_blob ('DIR_02' , v_file);
update final_data set data = v_blob where filename = v_file;
commit;
exception when others then null;
end;
end loop;
end;
/
PL/SQL procedure successfully completed.
最终选择
SQL> select a.filename , blob_to_char(a.data) as content from final_data a ;
FILENAME CONTENT
---------------------------------------- --------------------------------------------------------------------------------
data.txt This is an example of text for this city
data1.txt Another example for Catalonia and Barcelona
data2.txt Second example for Catalonia and Barcelona
SQL>
显然,如果我愿意,我会在表中放置一个名为 directory_path 的字段,因此我可以完全自动创建目录。
我没有公布我的所有想法,只是让你有一个想法。就我而言
- 所有过程都包含在一个 shell 脚本中
- 该过程由
dbms_scheduler
每一天触发 - 就我而言,我将不同目录的一些日志文件更新为最终表,以便某些用户可以检查它们。原因是他们无法访问服务器中的文件系统。
- 我使用
merge
语句在最终表中插入/更新,因此我只上传新文件或修改文件的内容。为此,我在 csv 文件中检索文件的时间戳。
请分享您的疑问和建议。