Before I can advise anything, I need to know the answer to this question:
name phone email
John 555-00-00 [email protected]
John 555-00-01 [email protected]
John 555-00-01 [email protected]
What COUNT(*)
you want for this data?
Update:
If you just want to know that a record has any duplicates, use this:
WITH q AS (
SELECT 1 AS id, 'John' AS name, '555-00-00' AS phone, '[email protected]' AS email
UNION ALL
SELECT 2 AS id, 'John', '555-00-01', '[email protected]'
UNION ALL
SELECT 3 AS id, 'John', '555-00-01', '[email protected]'
UNION ALL
SELECT 4 AS id, 'James', '555-00-00', '[email protected]'
UNION ALL
SELECT 5 AS id, 'James', '555-00-01', '[email protected]'
)
SELECT *
FROM q qo
WHERE EXISTS
(
SELECT NULL
FROM q qi
WHERE qi.id <> qo.id
AND qi.name = qo.name
AND (qi.phone = qo.phone OR qi.email = qo.email)
)
It's more efficient, but doesn't tell you where the duplicate chain started.
This query select all entries along with the special field, chainid
, that indicates where the duplicate chain started.
WITH q AS (
SELECT 1 AS id, 'John' AS name, '555-00-00' AS phone, '[email protected]' AS email
UNION ALL
SELECT 2 AS id, 'John', '555-00-01', '[email protected]'
UNION ALL
SELECT 3 AS id, 'John', '555-00-01', '[email protected]'
UNION ALL
SELECT 4 AS id, 'James', '555-00-00', '[email protected]'
UNION ALL
SELECT 5 AS id, 'James', '555-00-01', '[email protected]'
),
dup AS (
SELECT id AS chainid, id, name, phone, email, 1 as d
FROM q
UNION ALL
SELECT chainid, qo.id, qo.name, qo.phone, qo.email, d + 1
FROM dup
JOIN q qo
ON qo.name = dup.name
AND (qo.phone = dup.phone OR qo.email = dup.email)
AND qo.id > dup.id
),
chains AS
(
SELECT *
FROM dup do
WHERE chainid NOT IN
(
SELECT id
FROM dup di
WHERE di.chainid < do.chainid
)
)
SELECT *
FROM chains
ORDER BY
chainid