I have a table with some duplicate entries. I have to discard all but one, and then update this latest one. I've tried with a temporary table and a while statement, in this way:
CREATE TABLE #tmp_ImportedData_GenericData
(
Id int identity(1,1),
tmpCode varchar(255) NULL,
tmpAlpha3Code varchar(50) NULL,
tmpRelatedYear int NOT NULL,
tmpPreviousValue varchar(255) NULL,
tmpGrowthRate varchar(255) NULL
)
INSERT INTO #tmp_ImportedData_GenericData
SELECT
MCS_ImportedData_GenericData.Code,
MCS_ImportedData_GenericData.Alpha3Code,
MCS_ImportedData_GenericData.RelatedYear,
MCS_ImportedData_GenericData.PreviousValue,
MCS_ImportedData_GenericData.GrowthRate
FROM MCS_ImportedData_GenericData
INNER JOIN
(
SELECT CODE, ALPHA3CODE, RELATEDYEAR, COUNT(*) AS NUMROWS
FROM MCS_ImportedData_GenericData AS M
GROUP BY M.CODE, M.ALPHA3CODE, M.RELATEDYEAR
HAVING count(*) > 1
) AS M2 ON MCS_ImportedData_GenericData.CODE = M2.CODE
AND MCS_ImportedData_GenericData.ALPHA3CODE = M2.ALPHA3CODE
AND MCS_ImportedData_GenericData.RELATEDYEAR = M2.RELATEDYEAR
WHERE
(MCS_ImportedData_GenericData.PreviousValue <> 'INDEFINITO')
-- SELECT * from #tmp_ImportedData_GenericData
-- DROP TABLE #tmp_ImportedData_GenericData
DECLARE @counter int
DECLARE @rowsCount int
SET @counter = 1
SELECT @rowsCount = count(*) from #tmp_ImportedData_GenericData
-- PRINT @rowsCount
WHILE @counter < @rowsCount
BEGIN
SELECT
@Code = tmpCode,
@Alpha3Code = tmpAlpha3Code,
@RelatedYear = tmpRelatedYear,
@OldValue = tmpPreviousValue,
@GrowthRate = tmpGrowthRate
FROM
#tmp_ImportedData_GenericData
WHERE
Id = @counter
DELETE FROM MCS_ImportedData_GenericData
WHERE
Code = @Code
AND Alpha3Code = @Alpha3Code
AND RelatedYear = @RelatedYear
AND PreviousValue <> 'INDEFINITO' OR PreviousValue IS NULL
UPDATE
MCS_ImportedData_GenericData
SET
PreviousValue = @OldValue, GrowthRate = @GrowthRate
WHERE
Code = @Code
AND Alpha3Code = @Alpha3Code
AND RelatedYear = @RelatedYear
AND MCS_ImportedData_GenericData.PreviousValue ='INDEFINITO'
SET @counter = @counter + 1
END
but it takes too long time, even if there are just 20000 - 30000 rows to process.
Does anyone has some suggestions in order to improve performances?
Thanks in advance!