SQL Problems with old style outer JOINs

Problems with old style outer JOINs

-- examine some issues with the old-style \
-- outer join using a simple example of only two tables, 
-- Customers and Orders:
DROP TABLE Orders
GO
DROP TABLE Customers
GO
CREATE TABLE Customers
(
Cust_ID      int  PRIMARY KEY,
Cust_Name    char(20)
)
GO


CREATE TABLE Orders
(
OrderID    int    PRIMARY KEY,
Cust_ID    int    REFERENCES Customers(Cust_ID)
)
GO

INSERT Customers VALUES (1, 'Cust 1')
INSERT Customers VALUES (2, 'Cust 2')
INSERT Customers VALUES (3, 'Cust 3')
INSERT Orders VALUES (10001, 1)
INSERT Orders VALUES (20001, 2)
GO
--  At a glance, in the simplest case, 
-- the new-style and old-style syntax appear 
-- to work the same. Here’s the new syntax:
SELECT 
'Customers.Cust_ID'=Customers.Cust_ID, 
	Customers.Cust_Name,
    'Orders.Cust_ID'=Orders.Cust_ID
FROM Customers LEFT JOIN Orders
    ON Customers.Cust_ID=Orders.Cust_ID


--And here’s the old-style syntax:
SELECT 'Customers.Cust_ID'=Customers.Cust_ID, 
	Customers.Cust_Name,
    'Orders.Cust_ID'=Orders.Cust_ID
FROM Customers, Orders 
WHERE Customers.Cust_ID *= Orders.Cust_ID

-- As you begin to add restrictions, things get tricky. 
-- What if you want to filter out Cust 2? 
-- With the new syntax it’s easy, but remember 
-- not to filter out the row with NULL 
-- that the outer join just preserved!

SELECT 'Customers.Cust_ID'=Customers.Cust_ID, 
	Customers.Cust_Name,
'Orders.Cust_ID'=Orders.Cust_ID
FROM Customers LEFT JOIN Orders
    ON Customers.Cust_ID=Orders.Cust_ID 
WHERE Orders.Cust_ID <> 2 OR Orders.Cust_ID IS NULL

--  Now try to do this query using the old-style syntax 
-- and filter out Cust 2:

SELECT 'Customers.Cust_ID'=Customers.Cust_ID, 
	Customers.Cust_Name,
'Orders.Cust_ID'=Orders.Cust_ID
FROM Customers, Orders 
WHERE Customers.Cust_ID *= Orders.Cust_ID
AND (Orders.Cust_ID <> 2 OR Orders.Cust_ID IS NULL)


-- Notice that this time, we don’t get rid of Cust 2. 
-- The check for NULL occurs before the JOIN, 
-- so the outer-join operation puts Cust 2 back. 
-- This result might be less than intuitive, 
-- but at least we can explain and defend it. 
-- That’s not always the case, as you’ll see in a moment.
-- If you look at the preceding query, 
-- you might think that we should have filtered out 
-- Customers.Cust_ID rather than Orders.Cust_ID. 
-- How did we miss that? 
-- Surely this query will fix the problem:

SELECT 'Customers.Cust_ID'=Customers.Cust_ID, Customers.Cust_Name,
'Orders.Cust_ID'=Orders.Cust_ID
FROM Customers, Orders 
WHERE Customers.Cust_ID *= Orders.Cust_ID
AND (Customers.Cust_ID <> 2 OR Orders.Cust_ID IS NULL)

-- Oops! Same result. 
-- The problem here is that Orders.Cust_ID IS NULL 
-- is now being applied after the outer join, 
-- so the row is presented again. 
-- If we’re careful and understand exactly 
-- how the old outer join is processed, 
-- we can get the results we want 
-- with the old-style syntax for this query. 
-- We need to understand that the OR Orders.Cust_ID IS NULL 
-- puts back Cust_ID 2, so just take that out. 
-- Here is the code:

 SELECT 'Customers.Cust_ID'=Customers.Cust_ID, Customers.Cust_Name,
    'Orders.Cust_ID'=Orders.Cust_ID
FROM Customers, Orders 
WHERE Customers.Cust_ID *= Orders.Cust_ID
AND Customers.Cust_ID <> 2


-- Finally! This is the result we want. 
-- And if you really think about it, 
-- the semantics are even understandable 
-- (although different from the new style). 
-- So maybe this is much ado about nothing; 
-- all we have to do is understand how it works, right? 
-- Wrong. Besides the issues of joins with 
-- more than two tables and the lack of a full outer join, 
-- we also can’t effectively deal with subqueries and views. 
-- For example, let’s try creating a view with 
-- the old-style outer join:
GO
DROP VIEW Cust_old_OJ
GO
CREATE VIEW Cust_old_OJ AS
(SELECT Orders.Cust_ID, Customers.Cust_Name
FROM Customers, Orders 
WHERE Customers.Cust_ID *= Orders.Cust_ID)
GO
-- A simple select from the view looks fine:
SELECT * FROM Cust_old_OJ


-- But restricting from this view doesn’t 
-- seem to make sense:
SELECT * FROM Cust_old_OJ WHERE Cust_ID <> 2
    AND Cust_ID IS NOT NULL

-- If we expand the view to the full select and 
-- we realize that Cust_ID is Orders.Cust_ID, 
-- not Customers.Cust_ID, perhaps we can understand 
-- why this happened. But we still can’t filter out 
-- those rows!
-- In contrast, if we create the view with 
-- the new syntax and correct semantics, 
-- it works exactly as expected:
GO
DROP VIEW Cust_new_OJ
GO
CREATE VIEW Cust_new_OJ AS
(SELECT Orders.Cust_ID, Customers.Cust_Name
FROM Customers LEFT JOIN Orders 
    ON Customers.Cust_ID=Orders.Cust_ID )
GO

SELECT * FROM Cust_new_OJ WHERE Cust_ID <> 2 AND Cust_ID IS NOT NULL


-- In these examples, the new syntax performed 
-- the outer join and then applied the restrictions 
-- in the WHERE clause to the result. In contrast, 
-- the old style applied the WHERE clause to the 
-- tables being joined and then performed the outer join, 
-- which can reintroduce NULL rows. 
-- This is why the results often seemed bizarre. 
-- However, if that behavior is what you want, 
-- you could apply the criteria in the JOIN clause 
-- instead of in the WHERE clause.

-- The following example uses the new syntax to 
-- mimic the old behavior. The WHERE clause is shown 
-- here simply as a placeholder to make clear that 
-- the statement Cust_ID <> 2 is in the JOIN section, 
-- not in the WHERE section.

SELECT 'Customers.Cust_ID'=Customers.Cust_ID, Customers.Cust_Name,
    'Orders.Cust_ID'=Orders.Cust_ID
FROM Customers LEFT JOIN Orders
    ON Customers.Cust_ID=Orders.Cust_ID 
    AND Orders.Cust_ID <> 2
WHERE 1=1


-- As you can see, the row for Cust 2 was 
-- filtered out from the Orders table before the join, 
-- but because it was NULL, 
-- it was reintroduced by the OUTER JOIN operation.