I'm doing a relatively complex CTE query with an expensive user defined scalar function call.
The where clauses of the CTE filters most of the large table data, and returns relatively small result, so calculating an expensive field is not a problem, if calculated on the resultset.
SQL server is doing a good job if not evaluating the expensive column, if not touched by any of the predicates inside CTE subqueries, it's evaluated only for the resultset.
The question is, can i rely on this nice behaviour of the optimizer in this case, or things can go wrong when the plan is rebuilt?
Here is a test code.
use tempdb;
go
/****** Object: UserDefinedFunction [dbo].[expensive] Script Date: 01/15/2010 18:43:06 ******/
IF EXISTS (SELECT * FROM sys.objects WHERE object_id = OBJECT_ID(N'[dbo].[expensive]') AND type in (N'FN', N'IF', N'TF', N'FS', N'FT'))
DROP FUNCTION [dbo].[expensive]
go
-- used to model expensive user defined function.
-- inexpensive in case of @i = 1
create function dbo.expensive(@i int)
returns int
as
begin
if( @i = 1 ) begin return @i; -- inexpensive in this case
end;
declare @N bigint
declare @ret bigint
set @N = 17; -- will generate a set of 2^N
declare @tab table(num int);
with gig as
(
select 1 as num
union all
select g.num + 1 as num from gig g
where g.num < @N
union all
select g.num + 1 as num from gig g
where g.num < @N
)
select @ret = count(num) from gig;
return @ret;
end
go
declare @tab table(i int);
insert into @tab select 1 union select 2 union select 3;
select CURRENT_TIMESTAMP;
with source as
(
-- some really complex stuff that has an expensive calcutated scalar
select c.i, c.caclulated from @tab t
join
(select i, dbo.expensive(i) as caclulated from @tab) as c
on t.i = c.i
)
select * from source where
i = 1; -- this query is inexpensive, because do not touch anything but 1
select CURRENT_TIMESTAMP;
-- this one is expensive
select dbo.expensive(2)
select CURRENT_TIMESTAMP;