首页 > 解决方案 > 将可以包含多个名称且没有分隔符的名称列拆分为人员 1 和人员 2

问题描述

如何将可以包含 2 个名称的字符串拆分为 Person1 和 Person2 ?名称之间没有分隔符,每行并不总是有第二个人,也不一定是第一人称或第二人称的中间名首字母/名字,只有有时第二个名字会用“AND”分​​隔名字的例子是如下

所需的输出设计

标签: sqlsql-servertsqlparsingsplit

解决方案


创建一个函数,用于从某个表列中拆分有问题的字符串值:

CREATE FUNCTION [dbo].[Fn_Splittemp]
        (
          @text VARCHAR(8000)
        , @delimiter VARCHAR(20) = ' '
        )
        RETURNS @String TABLE
            (
                  Position INT IDENTITY PRIMARY KEY
                , StringValue VARCHAR(8000)
            )
        AS
        BEGIN
            DECLARE @index INT
            SET @index = -1
            WHILE (LEN(@text) >0)
                    BEGIN
                        SET @index = CHARINDEX(@delimiter, @text)
                            IF (@index = 0) AND (LEN(@text) > 0)
                                BEGIN
                                    INSERT INTO @string VALUES (@text)
                                    BREAK
                                END
                            IF (@index > 1)
                                BEGIN
                                    INSERT INTO @String VALUES (LEFT(@text, (@index-1)))
                                    SET @text = RIGHT(@text, (LEN(@text)-@index))
                                END
                            ELSE
                                    SET @text = RIGHT(@text, (LEN(@text)-@index))
                                END
                        RETURN
                    END
    GO

拆分、清理和分配给各自的 name_columns:

CREATE TABLE #t0 (rid INT IDENTITY, rawnames VARCHAR(8000));
GO

INSERT INTO #t0 VALUES ('JANE MIDDLETON John MIDDLETON'),
('SUE FRACARO BOB FRACARO'),
('TONY FRENCH'),
('JOHN EDUARDO OCHOA AND JANE ADRIANA OCHOA'),
('TONY JOHN CARPENTER TONYA CARPENTER');
GO

SELECT n.rid, n.rawnames, fn.StringValue AS Names, 
COUNT(*) OVER(PARTITION BY rawnames) AS wordcount,
ROW_NUMBER() OVER(PARTITION BY fn.stringvalue,rawnames ORDER BY fn.stringvalue) AS LastNameids,
fn.Position
INTO #t1
FROM #t0 n
cross apply dbo.Fn_Splittemp(n.rawnames, ' ') AS fn
GO

SELECT rid, rawnames, Position AS Pid, 
PersonName, LastName INTO #t2
FROM
(SELECT t.rid, t.rawnames, t.names AS Lastname, LTRIM(RTRIM(REPLACE(f.StringValue,'and',''))) AS PersonName, f.Position
FROM
    (SELECT replace(sqa.rawnames,sqa.Names,sqa.Names+',') AS delimstr , sqa.* 
        FROM #t1 sqa
        WHERE wordcount<=3 AND position = (SELECT MAX(position) from #t1 crq where crq.rid = sqa.rid)
    )t
cross apply dbo.Fn_Splittemp(delimstr,',') f
UNION ALL
SELECT b.rid, b.rawnames, b.Names AS Lastname, LTRIM(RTRIM(REPLACE(f.StringValue,'and',''))) AS PersonName, f.Position
FROM 
(SELECT replace(rawnames,names,names+',') AS delimstr, *
FROM #t1 
WHERE wordcount>3 AND LastNameids>1)b
cross apply dbo.Fn_Splittemp(delimstr,',') f
)sqt
GO

SELECT * INTO #t3 FROM #t2 cross apply dbo.Fn_Splittemp(personname, ' ');
GO

SELECT t.rawnames, fina.Firstname, fina.MiddleName, fina.LastName
FROM #t0 t
JOIN (
SELECT rid, pid, [1] AS Firstname, NULL AS MiddleName, [2] AS LastName 
FROM
(SELECT * FROM (SELECT rid, pid, position, stringvalue, 
COUNT(*) OVER(PARTITION BY rid, pid) AS cnt FROM #t3)a
WHERE a.cnt <=2)apiv
PIVOT
(MAX(stringvalue)
    FOR position IN ([1],[2])
    )piva
UNION ALL
SELECT rid, pid, [1] AS Firstname, [2] AS MiddleName, [3] AS LastName
FROM
(SELECT * FROM (SELECT rid, pid, position, stringvalue, 
COUNT(*) OVER(PARTITION BY rid, pid) AS cnt FROM #t3)a
WHERE a.cnt >2)apiv
PIVOT
(MAX(stringvalue)
    FOR position IN ([1],[2],[3])
    )piva
)fina
ON fina.rid = t.rid;

推荐阅读