sql - 将可以包含多个名称且没有分隔符的名称列拆分为人员 1 和人员 2
问题描述
如何将可以包含 2 个名称的字符串拆分为 Person1 和 Person2 ?名称之间没有分隔符,每行并不总是有第二个人,也不一定是第一人称或第二人称的中间名首字母/名字,只有有时第二个名字会用“AND”分隔名字的例子是如下
- 简·米德尔顿 约翰·米德尔顿
- 苏弗拉卡罗鲍勃弗拉卡罗
- 托尼法语
- 约翰·爱德华多·奥乔亚和简·阿德里安娜·奥乔亚
- 托尼·约翰·卡彭特 托尼娅·卡彭特
所需的输出设计
- 人 1 名
- 人 1 中间名
- 人 1 姓
- 人 2 名
- 人 2 中间名
- 人 2 姓
解决方案
创建一个函数,用于从某个表列中拆分有问题的字符串值:
CREATE FUNCTION [dbo].[Fn_Splittemp]
(
@text VARCHAR(8000)
, @delimiter VARCHAR(20) = ' '
)
RETURNS @String TABLE
(
Position INT IDENTITY PRIMARY KEY
, StringValue VARCHAR(8000)
)
AS
BEGIN
DECLARE @index INT
SET @index = -1
WHILE (LEN(@text) >0)
BEGIN
SET @index = CHARINDEX(@delimiter, @text)
IF (@index = 0) AND (LEN(@text) > 0)
BEGIN
INSERT INTO @string VALUES (@text)
BREAK
END
IF (@index > 1)
BEGIN
INSERT INTO @String VALUES (LEFT(@text, (@index-1)))
SET @text = RIGHT(@text, (LEN(@text)-@index))
END
ELSE
SET @text = RIGHT(@text, (LEN(@text)-@index))
END
RETURN
END
GO
拆分、清理和分配给各自的 name_columns:
CREATE TABLE #t0 (rid INT IDENTITY, rawnames VARCHAR(8000));
GO
INSERT INTO #t0 VALUES ('JANE MIDDLETON John MIDDLETON'),
('SUE FRACARO BOB FRACARO'),
('TONY FRENCH'),
('JOHN EDUARDO OCHOA AND JANE ADRIANA OCHOA'),
('TONY JOHN CARPENTER TONYA CARPENTER');
GO
SELECT n.rid, n.rawnames, fn.StringValue AS Names,
COUNT(*) OVER(PARTITION BY rawnames) AS wordcount,
ROW_NUMBER() OVER(PARTITION BY fn.stringvalue,rawnames ORDER BY fn.stringvalue) AS LastNameids,
fn.Position
INTO #t1
FROM #t0 n
cross apply dbo.Fn_Splittemp(n.rawnames, ' ') AS fn
GO
SELECT rid, rawnames, Position AS Pid,
PersonName, LastName INTO #t2
FROM
(SELECT t.rid, t.rawnames, t.names AS Lastname, LTRIM(RTRIM(REPLACE(f.StringValue,'and',''))) AS PersonName, f.Position
FROM
(SELECT replace(sqa.rawnames,sqa.Names,sqa.Names+',') AS delimstr , sqa.*
FROM #t1 sqa
WHERE wordcount<=3 AND position = (SELECT MAX(position) from #t1 crq where crq.rid = sqa.rid)
)t
cross apply dbo.Fn_Splittemp(delimstr,',') f
UNION ALL
SELECT b.rid, b.rawnames, b.Names AS Lastname, LTRIM(RTRIM(REPLACE(f.StringValue,'and',''))) AS PersonName, f.Position
FROM
(SELECT replace(rawnames,names,names+',') AS delimstr, *
FROM #t1
WHERE wordcount>3 AND LastNameids>1)b
cross apply dbo.Fn_Splittemp(delimstr,',') f
)sqt
GO
SELECT * INTO #t3 FROM #t2 cross apply dbo.Fn_Splittemp(personname, ' ');
GO
SELECT t.rawnames, fina.Firstname, fina.MiddleName, fina.LastName
FROM #t0 t
JOIN (
SELECT rid, pid, [1] AS Firstname, NULL AS MiddleName, [2] AS LastName
FROM
(SELECT * FROM (SELECT rid, pid, position, stringvalue,
COUNT(*) OVER(PARTITION BY rid, pid) AS cnt FROM #t3)a
WHERE a.cnt <=2)apiv
PIVOT
(MAX(stringvalue)
FOR position IN ([1],[2])
)piva
UNION ALL
SELECT rid, pid, [1] AS Firstname, [2] AS MiddleName, [3] AS LastName
FROM
(SELECT * FROM (SELECT rid, pid, position, stringvalue,
COUNT(*) OVER(PARTITION BY rid, pid) AS cnt FROM #t3)a
WHERE a.cnt >2)apiv
PIVOT
(MAX(stringvalue)
FOR position IN ([1],[2],[3])
)piva
)fina
ON fina.rid = t.rid;
推荐阅读
- php - Trying to center contents via PHP
- javascript - Cloud Functions - View all properties of data() object?
- node.js - spotify-web-api-node WebapiError
- java - PKCS12 类型的 Java 8 KeyStore 为 getCertificate() 返回 null
- java - Java 1.8.0_162、Drools 6.5.0 和多决策表(电子表格)
- vim - 如何使用 vi 或 vim 获取字符数
- visual-studio-code - 如何在 Visual Studio 代码中显示完整的长行(自动换行)
- angularjs - 指令不识别传入的变量
- h2 - 蟑螂写性能
- arduino - 在 RFID 输出类型之间转换?