It looks like from the execution plan the IN version is faster than EXISTS version
I'm thinking that EXISTS query is faster, for it eagerly checks the conditions. The IN query though it looks intuitive, I feel that it seems it resolves the final conditions' result very late; that is, from inside out, I perceived that IN are slower because the next condition need to wait for the result of its inner conditions.
Though from the look of the execution plans below, the IN is faster; it has lower cost.
Part of me is wanting to use the IN version, it looks easy and intuitive for the next maintainer, and the execution plan below seems to indicate that it is faster than EXISTS. But another part of me is wanting to use the EXISTS, because I just feel it is faster, but the execution plan below seems to contradict that perception.
Which of the two queries below is faster, is it the IN version or the EXISTS version?
IN version:
explain analyze
select ceil(
( count(distinct company_rec_id)::numeric + 1 )
/ ((1)) )::int
from
parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
(((E'')) <> '' and to_tsvector(c.company) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((0)) <> 0 and ord.parcel_number = ((0)))
or ((0)) = 0
)
and parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from parcel_application_shoe
where
(
(((E'')) <> '' and to_tsvector(extract_words(shoe_description)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(order_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(style_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
)
)
"Aggregate (cost=1060.73..1060.75 rows=1 width=37) (actual time=29.028..29.028 rows=1 loops=1)"
" -> Hash Join (cost=880.43..1053.04 rows=3074 width=37) (actual time=13.261..16.365 rows=3074 loops=1)"
" Hash Cond: ((ord.client_rec_id)::text = (c.company_rec_id)::text)"
" -> Hash Join (cost=864.79..995.14 rows=3074 width=37) (actual time=13.115..15.027 rows=3074 loops=1)"
" Hash Cond: ((ord.parcel_rec_id)::text = (parcel_application.parcel_rec_id)::text)"
" -> Seq Scan on parcel ord (cost=0.00..78.87 rows=3087 width=74) (actual time=0.005..0.373 rows=3087 loops=1)"
" -> Hash (cost=826.37..826.37 rows=3074 width=37) (actual time=13.102..13.102 rows=3074 loops=1)"
" -> HashAggregate (cost=795.63..826.37 rows=3074 width=37) (actual time=11.835..12.281 rows=3074 loops=1)"
" -> Hash Join (cost=541.34..787.59 rows=3218 width=37) (actual time=7.076..10.286 rows=3218 loops=1)"
" Hash Cond: ((parcel_application.parcel_application_rec_id)::text = (parcel_application_shoe.parcel_application_rec_id)::text)"
" -> Seq Scan on parcel_application (cost=0.00..122.18 rows=3218 width=74) (actual time=0.004..0.710 rows=3218 loops=1)"
" -> Hash (cost=501.12..501.12 rows=3218 width=37) (actual time=7.061..7.061 rows=3218 loops=1)"
" -> HashAggregate (cost=468.94..501.12 rows=3218 width=37) (actual time=5.721..6.220 rows=3218 loops=1)"
" -> Seq Scan on parcel_application_shoe (cost=0.00..442.95 rows=10395 width=37) (actual time=0.004..2.318 rows=10395 loops=1)"
" -> Hash (cost=11.95..11.95 rows=295 width=37) (actual time=0.136..0.136 rows=295 loops=1)"
" -> Seq Scan on company c (cost=0.00..11.95 rows=295 width=37) (actual time=0.013..0.068 rows=295 loops=1)"
"Total runtime: 29.122 ms"
EXISTS version:
explain analyze
select ceil(
( count(distinct company_rec_id)::numeric + 1 )
/ ((1)) )::int from
parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
(((E'')) <> '' and to_tsvector(c.company) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((0)) <> 0 and ord.parcel_number = ((0)))
or ((0)) = 0
)
and exists
(
select * from parcel_application pa
where pa.parcel_rec_id = ord.parcel_rec_id
and
exists
(
select * from parcel_application_shoe ord_item
where
ord_item.parcel_application_rec_id = pa.parcel_application_rec_id
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.shoe_description)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.order_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
and
(
(((E'')) <> '' and to_tsvector(extract_words(ord_item.style_number)) @@ plainto_tsquery(extract_words(((E'')))))
or ((E'')) = ''
)
)
)
"Aggregate (cost=17773.13..17773.16 rows=1 width=37) (actual time=34.519..34.520 rows=1 loops=1)"
" -> Hash Join (cost=17636.59..17769.11 rows=1609 width=37) (actual time=17.174..20.426 rows=3074 loops=1)"
" Hash Cond: ((ord.client_rec_id)::text = (c.company_rec_id)::text)"
" -> Hash Join (cost=17620.95..17731.35 rows=1609 width=37) (actual time=16.882..18.862 rows=3074 loops=1)"
" Hash Cond: ((ord.parcel_rec_id)::text = (pa.parcel_rec_id)::text)"
" -> Seq Scan on parcel ord (cost=0.00..78.87 rows=3087 width=74) (actual time=0.006..0.409 rows=3087 loops=1)"
" -> Hash (cost=17601.74..17601.74 rows=1537 width=37) (actual time=16.858..16.858 rows=3074 loops=1)"
" -> HashAggregate (cost=17586.37..17601.74 rows=1537 width=37) (actual time=15.015..15.535 rows=3074 loops=1)"
" -> Seq Scan on parcel_application pa (cost=0.00..17582.35 rows=1609 width=37) (actual time=10.040..12.440 rows=3218 loops=1)"
" Filter: (alternatives: SubPlan 1 or hashed SubPlan 2)"
" SubPlan 1"
" -> Index Scan using fki_parcel_application_shoe__parcel_application on parcel_application_shoe ord_item (cost=0.00..16.28 rows=3 width=0) (never executed)"
" Index Cond: ((parcel_application_rec_id)::text = ($0)::text)"
" SubPlan 2"
" -> Seq Scan on parcel_application_shoe ord_item (cost=0.00..442.95 rows=10395 width=37) (actual time=0.005..4.482 rows=10395 loops=1)"
" -> Hash (cost=11.95..11.95 rows=295 width=37) (actual time=0.284..0.284 rows=295 loops=1)"
" -> Seq Scan on company c (cost=0.00..11.95 rows=295 width=37) (actual time=0.010..0.155 rows=295 loops=1)"
"Total runtime: 34.645 ms"
This is the final query I end up using, the IN version. I can only perceived marginal performance difference between IN and EXISTS versions. I'm thinking more about the readability for the next maintainer, hence I used IN
select ord.parcel_number, ord.received_date, c.company
from parcel ord
join company c on c.company_rec_id = ord.client_rec_id
where
(
:_company = ''
or
to_tsvector(c.company) @@ plainto_tsquery(extract_words(:_company))
)
and
(
:_fttsc_num = 0
or ord.parcel_number = :_fttsc_num
)
and
(
(:_item = '' and :_order_num = '' and :_style_num = '')
or
ord.parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from parcel_application_shoe
where
(
:_item = ''
or to_tsvector(extract_words(shoe_description)) @@ plainto_tsquery(extract_words(:_item))
)
and
(
:_order_num = ''
or to_tsvector(extract_words(order_number)) @@ plainto_tsquery(extract_words(:_order_num))
)
and
(
:_style_num = ''
or to_tsvector(extract_words(style_number)) @@ plainto_tsquery(extract_words(:_style_num))
)
) -- parcel_application_rec_id IN
) -- parcel_rec_id IN
)
and
-- material filter...
(
(:_material = '')
or
-- implied material <> ''
parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from mlist
where mlist_rec_id in
(
select mlist_rec_id
from mlist_detail
join material m using(material_rec_id)
where to_tsvector(extract_words(m.material)) @@ plainto_tsquery(extract_words(:_material))
)
)
)
)
-- ...material filter
and
-- parameter filter...
(
(:_parameter = '')
or
-- implied parameter <> ''
parcel_rec_id in
(
select parcel_rec_id
from parcel_application
where parcel_application_rec_id in
(
select parcel_application_rec_id
from mlist
where mlist_rec_id in
(
select mlist_rec_id
from mlist_detail
where mlist_detail_rec_id in
(
select mlist_detail_rec_id
from mlist_detail_parameter
join parameter p using(parameter_rec_id)
where to_tsvector(extract_words(p.parameter)) @@ plainto_tsquery(extract_words(:_parameter))
)
)
)
)
)
-- ...parameter filter
order by ord.received_date