删除重复记录

    技术2022-05-11  16

    CREATE TABLE #t1(ID INT NULL, VALUE VARCHAR(2))INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')INSERT INTO #t1(ID, VALUE) VALUES (2,'bb')INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')INSERT INTO #t1(ID, VALUE) VALUES (1,'aa')INSERT INTO #t1(ID, VALUE) VALUES (3,'cc')INSERT INTO #t1(ID, VALUE) VALUES (3,'cc')GO-- BINARY_CHECKSUM(<column names>): <column names> are columns that we want to compare duplicates for-- if you want to compare the full row then change BINARY_CHECKSUM(<column names>) -> BINARY_CHECKSUM(*)       -- for SQL Server 2000+ a loop-- save checksums and rowcounts for duplicatesSELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum, COUNT(*) AS Cnt INTO #t2 FROM #t1 GROUP BY BINARY_CHECKSUM(ID, VALUE) HAVING COUNT(*)>1DECLARE @ChkSum BIGINT, @rc INT-- get the first checksum and set the rowcount to the count - 1 -- because we want to leave one duplicateSELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2WHILE EXISTS (SELECT * FROM #t2)BEGIN        -- rowcount is one less than the duplicate rows count    SET ROWCOUNT @rc    DELETE FROM #t1 WHERE BINARY_CHECKSUM(ID, VALUE) = @ChkSum     -- remove the processed duplicate from the checksum table    DELETE #t2 WHERE ChkSum = @ChkSum     -- select the next duplicate rows to delete    SELECT TOP 1 @ChkSum = ChkSum, @rc = Cnt-1 FROM #t2    END SET ROWCOUNT 0GOSELECT * FROM #t1 -- for SQL Server 2005+ a cool CTE;WITH Numbered AS (    SELECT ROW_NUMBER() OVER (PARTITION BY ChkSum ORDER BY ChkSum) AS RN, *    FROM (             SELECT BINARY_CHECKSUM(ID, VALUE) AS ChkSum             FROM #t1         ) t) DELETE FROM Numbered WHERE RN > 1;GOSELECT * FROM #t1 DROP TABLE #t1;DROP TABLE #t2;


    最新回复(0)