tags:

views:

44

answers:

1

I have temporary memory tables A and B. each contain 1 column of integer values I need to find all values which are in A but not in B.

The problem is that its very slow due to the fact (I think) that memory tables use hash and not ordered keys.

How can I perform it efficiently? Currently I'm using SELECT val FROM tableA WHERE val NOT IN (SELECT val FROM tableB)

The definition of each table : CREATE TABLE tableA (val INT, PRIMARY KEY USING HASH (val)) ENGINE = MEMORY;

+1  A: 
select
 a.val
from
 tableA a
left outer join tableB b on a.val = b.val
where
 b.val is null;

some additional testing on 250K rows reveals that there's not much between them:

call load_test_data();

call test_memory_tables_hash(); -- 0:00:00.597
call test_memory_tables_hash(); -- 0:00:00.362


call load_test_data();

call test_memory_tables_btree(); -- 0:00:00.460
call test_memory_tables_btree(); -- 0:00:00.429

full testing script:

drop table if exists tableA;
create table tableA
(
val int unsigned not null primary key
)
engine=innodb;

drop table if exists tableB;
create table tableB
(
val int unsigned not null primary key
)
engine=innodb;


drop procedure if exists load_test_data;

delimiter #

create procedure load_test_data()
proc_main:begin

declare i int unsigned default 0;
declare rnd int unsigned default 0;
declare max int unsigned default 250000;

  truncate table tableA;
  truncate table tableB;

  set autocommit = 0;

  while i < max do
    if i % 2 = 0 then insert into tableA values (i); end if;
    if i % 3 = 0 then insert into tableB values (i); end if;
    set i = i+1;
  end while;

  commit;

end proc_main #

delimiter ;

drop procedure if exists test_memory_tables_hash;

delimiter #

create procedure test_memory_tables_hash()
proc_main:begin

create temporary table mem_tableA 
(
  val int unsigned not null, index using hash(val)
) 
engine=memory select val from tableA;

create temporary table mem_tableB
(
  val int unsigned not null, index using hash(val)
) 
engine=memory;

insert into mem_tableA select val from tableA;
insert into mem_tableB select val from tableB;

select
 a.val
from
 mem_tableA a
left outer join mem_tableB b on a.val = b.val
where
 b.val is null
order by
 a.val desc
limit 64;

drop temporary table if exists mem_tableA;
drop temporary table if exists mem_tableB;

end proc_main #

delimiter ;

delimiter ;

drop procedure if exists test_memory_tables_btree;

delimiter #

create procedure test_memory_tables_btree()
proc_main:begin

create temporary table mem_tableA 
(
  val int unsigned not null, index using btree(val)
) 
engine=memory select val from tableA;

create temporary table mem_tableB
(
  val int unsigned not null, index using btree(val)
) 
engine=memory;

insert into mem_tableA select val from tableA;
insert into mem_tableB select val from tableB;

select
 a.val
from
 mem_tableA a
left outer join mem_tableB b on a.val = b.val
where
 b.val is null
order by
 a.val desc
limit 64;

drop temporary table if exists mem_tableA;
drop temporary table if exists mem_tableB;

end proc_main #

delimiter ;


call load_test_data();

call test_memory_tables_hash();
-- 0:00:00.597
call test_memory_tables_hash();
-- 0:00:00.362


call load_test_data();

call test_memory_tables_btree();
-- 0:00:00.460
call test_memory_tables_btree();
-- 0:00:00.429
f00