Hi,
I've proto-typed a solution to match customer records from varying sources. There may be up to 1million records in the customers table so I need to progress this into a set-based solution for efficiency (if that’s possible). Can any one clear the haze for me?
The select SQL below is just one of 5 queries that will run and to try and match the customer records. The idea is that when certain records are matched they will be inserted into the match table with a common id/BureauID that links them together. As new customer records arrive in the database they may match with an existing record that already has a BureauID so I need to examine the match records to extract the ID if it exists.
I had initially proto-typed a set-based query using ROW_NUMBER to assign a number to those records that satisfied the criteria* - with the idea of moving down the resultset and inserting into the match table on each break in the number, but the introduction of synonyms on Surname, Firstname and SecondName threw out the PARTITION BY clause….and now I'm stuck.
Any ideas greatly appreciated.
* Matching criteria
Surname (equal or synonym) AND
Firstname (equal or synonym) AND
SecondName (equal or synonym or Initial) AND
DateOfBirth (minimum 6 out of 8 - looking for transposed digits) AND
Gender (equal) AND
Streetname (equal to first 6 characters) OR
PostCode (equal)
SELECT TOP (100) PERCENT t.SubscriberID, t.CustomerID, t.Surname, t.FirstName, t.SecondName, t.Initial, t.SecondName_Re stOf, t.DateOfBirth, t.GenderID,
t.PostCode, t.Street6, dbo.tblMatch.Bu reauID
FROM dbo.tblCustomer AS t LEFT OUTER JOIN
dbo.tblMatch ON t.CustomerID = dbo.tblMatch.Cu stomerID AND t.SubscriberID = dbo.tblMatch.Su bscriberID
WHERE (t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame
WHERE (Name = @Firstname))) AND (t.SecondName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 1
WHERE (Name = @SecondName))) AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND
(t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_4
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 5
WHERE (Name = @Firstname))) AND (t.SecondName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 1
WHERE (Name = @SecondName))) AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND
(t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_3
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 4
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_3
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 4
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_2
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 3
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (t.SecondName_R estOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_2
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 3
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (t.SecondName_R estOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_1
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 2
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = t.SecondName_Re stOf) AND
(dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_1
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 2
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = t.SecondName_Re stOf) AND
(dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode)
CREATE TABLE [dbo].[tblCustomer](
[SubscriberID] [nvarchar](4) NOT NULL,
[CustomerID] [int] IDENTITY(1,1) NOT NULL,
[Surname] [varchar](35) NOT NULL,
[FirstName] [varchar](35) NOT NULL,
[SecondName] [varchar](35) NULL,
[SecondName_Rest Of] [varchar](35) NULL,
[Initial] [char](1) NULL,
[GenderID] [char](1) NOT NULL,
[DateOfBirth] [int] NOT NULL,
[Employer] [nvarchar](50) NULL,
[Occupation] [nvarchar](50) NULL,
[Street6] [varchar](6) NULL,
[PostCode] [int] NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[tblMatch](
[BureauID] [bigint] NOT NULL,
[SubscriberID] [int] NOT NULL,
[CustomerID] [int] NOT NULL,
[Date] [datetime] NOT NULL,
[UserID] [nvarchar](10) NOT NULL,
) ON [PRIMARY]
CREATE TABLE [dbo].[NameX_Surname](
[Name] [varchar](50) NOT NULL,
[NameMatch] [varchar](50) NOT NULL,
[Score] [int] NOT NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[NameX_Forename](
[Name] [varchar](50) NULL,
[NameMatch] [varchar](50) NULL,
[Score] [int] NULL
) ON [PRIMARY]
I've proto-typed a solution to match customer records from varying sources. There may be up to 1million records in the customers table so I need to progress this into a set-based solution for efficiency (if that’s possible). Can any one clear the haze for me?
The select SQL below is just one of 5 queries that will run and to try and match the customer records. The idea is that when certain records are matched they will be inserted into the match table with a common id/BureauID that links them together. As new customer records arrive in the database they may match with an existing record that already has a BureauID so I need to examine the match records to extract the ID if it exists.
I had initially proto-typed a set-based query using ROW_NUMBER to assign a number to those records that satisfied the criteria* - with the idea of moving down the resultset and inserting into the match table on each break in the number, but the introduction of synonyms on Surname, Firstname and SecondName threw out the PARTITION BY clause….and now I'm stuck.
Any ideas greatly appreciated.
* Matching criteria
Surname (equal or synonym) AND
Firstname (equal or synonym) AND
SecondName (equal or synonym or Initial) AND
DateOfBirth (minimum 6 out of 8 - looking for transposed digits) AND
Gender (equal) AND
Streetname (equal to first 6 characters) OR
PostCode (equal)
SELECT TOP (100) PERCENT t.SubscriberID, t.CustomerID, t.Surname, t.FirstName, t.SecondName, t.Initial, t.SecondName_Re stOf, t.DateOfBirth, t.GenderID,
t.PostCode, t.Street6, dbo.tblMatch.Bu reauID
FROM dbo.tblCustomer AS t LEFT OUTER JOIN
dbo.tblMatch ON t.CustomerID = dbo.tblMatch.Cu stomerID AND t.SubscriberID = dbo.tblMatch.Su bscriberID
WHERE (t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame
WHERE (Name = @Firstname))) AND (t.SecondName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 1
WHERE (Name = @SecondName))) AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND
(t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_4
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 5
WHERE (Name = @Firstname))) AND (t.SecondName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 1
WHERE (Name = @SecondName))) AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND
(t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_3
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 4
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_3
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 4
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_2
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 3
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (t.SecondName_R estOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_2
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 3
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (t.SecondName_R estOf = '') AND (dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5)
AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_1
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 2
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = t.SecondName_Re stOf) AND
(dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND (t.Street6 = @Street6) OR
(t.Surname IN
(SELECT NameMatch
FROM dbo.NameX_Surna me AS NameX_Surname_1
WHERE (Name = @Surname))) AND (t.FirstName IN
(SELECT NameMatch
FROM dbo.NameX_Foren ame AS NameX_Forename_ 2
WHERE (Name = @Firstname))) AND (@Initial = t.Initial) AND (@SecondName_Re stOf = t.SecondName_Re stOf) AND
(dbo.fnCheckDOB (t.DateOfBirth, @DateOfBirth) > 5) AND (t.GenderID = @GenderID) AND (t.PostCode = @PostCode)
CREATE TABLE [dbo].[tblCustomer](
[SubscriberID] [nvarchar](4) NOT NULL,
[CustomerID] [int] IDENTITY(1,1) NOT NULL,
[Surname] [varchar](35) NOT NULL,
[FirstName] [varchar](35) NOT NULL,
[SecondName] [varchar](35) NULL,
[SecondName_Rest Of] [varchar](35) NULL,
[Initial] [char](1) NULL,
[GenderID] [char](1) NOT NULL,
[DateOfBirth] [int] NOT NULL,
[Employer] [nvarchar](50) NULL,
[Occupation] [nvarchar](50) NULL,
[Street6] [varchar](6) NULL,
[PostCode] [int] NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[tblMatch](
[BureauID] [bigint] NOT NULL,
[SubscriberID] [int] NOT NULL,
[CustomerID] [int] NOT NULL,
[Date] [datetime] NOT NULL,
[UserID] [nvarchar](10) NOT NULL,
) ON [PRIMARY]
CREATE TABLE [dbo].[NameX_Surname](
[Name] [varchar](50) NOT NULL,
[NameMatch] [varchar](50) NOT NULL,
[Score] [int] NOT NULL
) ON [PRIMARY]
CREATE TABLE [dbo].[NameX_Forename](
[Name] [varchar](50) NULL,
[NameMatch] [varchar](50) NULL,
[Score] [int] NULL
) ON [PRIMARY]