You can probably do better than this using the SQL# functions, but in straight T-SQL, here you go.
The main idea here is to parse out the last segment/token in the name using REVERSE
and PATINDEX
, and then match it to a list of known suffixes.
First some test data:
IF OBJECT_ID('tempdb..#names') IS NOT NULL DROP TABLE #names
IF OBJECT_ID('tempdb..#suffixes') IS NOT NULL DROP TABLE #suffixes
CREATE TABLE #names (name VARCHAR(32))
CREATE TABLE #suffixes (suffix VARCHAR(32))
GO
INSERT #names VALUES ('BRUNNING, II' )
INSERT #names VALUES ('BURCH II' )
INSERT #names VALUES ('BUSS, JR.' )
INSERT #names VALUES ('CANI III' )
INSERT #names VALUES ('CHRISTIAN,SR' )
INSERT #names VALUES ('COLVIN Jr' )
INSERT #names VALUES ('COWHERD,JR.' )
INSERT #names VALUES ('BILLY BOB' )
INSERT #names VALUES ('JOHNNY' )
INSERT #suffixes VALUES ('II' )
INSERT #suffixes VALUES ('III')
INSERT #suffixes VALUES ('JR' )
INSERT #suffixes VALUES ('SR' )
Then, an inline SELECT
version. Notice the use of NULLIF
to control for SUBSTRING
errors.
SELECT
name
, left_segments
, right_segment
, new_name = CASE WHEN b.suffix IS NOT NULL THEN a.left_segments ELSE a.name END
, b.suffix
FROM (
SELECT
name
, left_segments = CASE WHEN left_segments LIKE '%[ ,]' THEN LEFT(left_segments,LEN(left_segments)-1) ELSE left_segments END
, right_segment = CASE WHEN right_segment LIKE '%[.]' THEN LEFT(right_segment,LEN(right_segment)-1) ELSE right_segment END
FROM (
SELECT *
, left_segments = RTRIM(LEFT(RTRIM(name),LEN(name)-NULLIF(PATINDEX('%[ ,]%',REVERSE(RTRIM(name))),0)))
, right_segment = RIGHT(RTRIM(name),NULLIF(PATINDEX('%[ ,]%',REVERSE(RTRIM(name))),0)-1)
FROM #names
) a
) a
LEFT JOIN #suffixes b ON a.right_segment = b.suffix
Alternately, UPDATE
w/ local vars:
ALTER TABLE #names ADD
left_segments VARCHAR(64)
, right_segment VARCHAR(64)
GO
DECLARE
@name VARCHAR(64)
, @len INT
, @last_delim INT
, @left_segments VARCHAR(64)
, @right_segment VARCHAR(64)
UPDATE #names SET
@name = RTRIM(name)
, @len = LEN(@name)
, @last_delim = @len-NULLIF(PATINDEX('%[ ,]%',REVERSE(@name)),0)
, @left_segments = RTRIM(LEFT(@name,@last_delim))
, @right_segment = RIGHT(@name,@len-@last_delim-1)
, @left_segments = CASE WHEN @left_segments LIKE '%[ ,]' THEN LEFT(@left_segments,LEN(@left_segments)-1) ELSE @left_segments END
, @right_segment = CASE WHEN @right_segment LIKE '%[.]' THEN LEFT(@right_segment,LEN(@right_segment)-1) ELSE @right_segment END
, left_segments = @left_segments
, right_segment = @right_segment
SELECT a.*
, new_name = CASE WHEN b.suffix IS NOT NULL THEN a.left_segments ELSE a.name END
, suffix = b.suffix
FROM #names a LEFT JOIN #suffixes b ON a.right_segment = b.suffix
The inline SELECT
is fairly convenient, but difficult to read and troubleshoot. I prefer the UPDATE
with local vars for anything I might have to return to later. Plus, it makes individual edits easier to apply.
EDIT, SELECT
method, slightly edited, and wrapped in an inline table-valued function. A inline TVF should be more efficient than a scalar UDF, and you get multiple return values to boot.
CREATE FUNCTION dbo.ParseNameAndSuffix (@name VARCHAR(64), @ValidSuffixes VARCHAR(512))
RETURNS TABLE AS RETURN (
SELECT
left_segments
, right_segment
, new_name = CASE WHEN CHARINDEX(';'+right_segment+';',';'+@ValidSuffixes+';') > 0 THEN a.left_segments ELSE a.name END
, suffix = CASE WHEN CHARINDEX(';'+right_segment+';',';'+@ValidSuffixes+';') > 0 THEN a.right_segment END
FROM (
SELECT
name
, left_segments = CASE WHEN left_segments LIKE '%[ ,]' THEN LEFT(left_segments,LEN(left_segments)-1) ELSE left_segments END
, right_segment = CASE WHEN right_segment LIKE '%[.]' THEN LEFT(right_segment,LEN(right_segment)-1) ELSE right_segment END
FROM (
SELECT name
, left_segments = RTRIM(LEFT(name,LEN(name)-NULLIF(PATINDEX('%[ ,]%',REVERSE(name)),0)))
, right_segment = RIGHT(name,NULLIF(PATINDEX('%[ ,]%',REVERSE(name)),0)-1)
FROM (SELECT name = LTRIM(RTRIM(@name))) a
) a
) a
)
GO
SELECT * FROM #names a
CROSS APPLY dbo.ParseNameAndSuffix(a.name,'II;III;JR;SR') b