tags:

views:

101

answers:

2

i have one table store post id and it's tags like :

Post_id |  Tags
--------------------------------------
1       |  keyword1,keyword2,keyword3

I want to loop though each row at this table and do :

  • put the keyword1,keyword2,keyword3 in new table :

    word_id    |  word_value
    -------------------------
       1       |  keyword1
       2       |  keyword2    
       3       |  keyword3
    
  • get mysql_insert_id() foreach (or exist word_id if word_value already there) and then put in the new table like :

    post_id |  word_id
    ------------------
    1       |   1    
    1       |   2    
    1       |   3
    

I've using php and mysql for do that task but this is slowly. Anyone have good idea?

A: 

do something like this:

-- TABLES

drop table if exists post_tags;
create table post_tags
(
post_id int unsigned not null auto_increment primary key,
tags_csv varchar(1024) not null
)
engine=innodb;

drop table if exists keywords;
create table keywords
(
keyword_id mediumint unsigned not null auto_increment primary key,
name varchar(255) unique not null
)
engine=innodb;


-- optimised for queries such as - select all posts that have keyword 3

drop table if exists post_keywords;
create table post_keywords
(
keyword_id mediumint unsigned not null,
post_id int unsigned not null,
primary key (keyword_id, post_id), -- clustered composite PK !
key (post_id)
)
engine=innodb;

-- STORED PROCEDURES


drop procedure if exists normalise_post_tags;

delimiter #

create procedure normalise_post_tags()
proc_main:begin

declare v_cursor_done tinyint unsigned default 0;

-- watch out for variable names that have the same names as fields !!

declare v_post_id int unsigned;
declare v_tags_csv varchar(1024);
declare v_keyword varchar(255);

declare v_keyword_id mediumint unsigned;

declare v_tags_done tinyint unsigned;
declare v_tags_idx int unsigned;

declare v_cursor cursor for select post_id, tags_csv from post_tags order by post_id;
declare continue handler for not found set v_cursor_done = 1;

set autocommit = 0; 

open v_cursor;
repeat

  fetch v_cursor into v_post_id, v_tags_csv;

  -- split the out the v_tags_csv and insert

  set v_tags_done = 0;       
  set v_tags_idx = 1;

  while not v_tags_done do

    set v_keyword = substring(v_tags_csv, v_tags_idx, 
      if(locate(',', v_tags_csv, v_tags_idx) > 0, 
        locate(',', v_tags_csv, v_tags_idx) - v_tags_idx, 
        length(v_tags_csv)));

      set v_keyword = trim(v_keyword);

      if length(v_keyword) > 0 then

        set v_tags_idx = v_tags_idx + length(v_keyword) + 1;

        -- add the keyword if it doesnt already exist
        insert ignore into keywords (name) values (v_keyword);

        select keyword_id into v_keyword_id from keywords where name = v_keyword;

        -- add the post_keywords
        insert ignore into post_keywords (keyword_id, post_id) values (v_keyword_id, v_post_id);

      else
        set v_tags_done = 1;
      end if;

  end while;

until v_cursor_done end repeat;

close v_cursor;

commit;

end proc_main #


delimiter ;


-- TEST DATA

insert into post_tags (tags_csv) values 
('keyword1,keyword2,keyword3'),
('keyword1,keyword5'),
('keyword4,keyword3,keyword6,keyword1');

-- TESTING

call normalise_post_tags();

select * from post_tags order by post_id;
select * from keywords order by keyword_id;
select * from post_keywords order by keyword_id, post_id;
f00
A: 

for each keyword, do

insert into newtable (id, keyword) select id, 'aKeyword' from oldtable where oldtable.keywords like '%aKeyword%'

if oldtable.keywords is just a VARCHAR, or

insert into newtable (id, keyword) select id, 'aKeyword' from oldtable where FIND_IN_SET('aKeyword',keywords)>0

if it's a SET type.

bemace