COMPOSITE PK APPROACH
drop table if exists users;
create table users(
user_id int unsigned not null auto_increment primary key, -- clustered auto_inc PK
username varbinary(32) not null,
unique key users_username_idx(username)
)engine=innodb;
insert into users (username) values ('f00'),('bar'),('bish'),('bash'),('bosh'),('F00');
drop table if exists user_friends;
create table user_friends(
user_id int unsigned not null,
friend_user_id int unsigned not null,
primary key (user_id, friend_user_id) -- clustered composite PK
)engine=innodb;
insert into user_friends values
(1,2),(1,3), (2,1),(2,5), (3,5), (4,1),(4,2),(4,3),(4,5),(4,6), (5,4),(5,1);
most of the time i query user_friends wanting to list all the friends for a given user
so my queries are taking advantage of the clustered primary key (user_id, friend_user_id)
select * from user_friends where user_id = 4; -- uses PK index
if i want to delete a friend i have a composite PK so i need to specify the user_id and
friend_user_id that i want to delete - both parts of the key need to be satisfied. This
has a habit of making your application logic a little more complicated.
delete from user_friends where user_id = 4 and user_friend_id = 5; -- uses PK index
AUTO INC PK APPROACH (user table remains the same)
drop table if exists user_friends;
create table user_friends(
friend_id int unsigned not null auto_increment primary key, -- clustered auto_inc PK
user_id int unsigned not null,
friend_user_id int unsigned not null,
unique key user_friends_idx (user_id, friend_user_id) -- non clustered secondary index
)engine=innodb;
most of the time i query user_friends wanting to list all the friends for a given user
in this case i can't take advantage of the primary key and i need to create a secondary
index on (user_id, friend_user_id). Now i have 2 indexes on this table the PK index
enforcing entity integrity and a secondary index which helps optimize my queries.
Adding the secondary index has a cost associated to it. inserts/deletes now have to
update 2 indexes vs. just one composite key index and it takes up more disk space ofc.
select * from user_friends where user_id = 4; -- uses secondary index not PK
however, if i want to delete a friend i can use the friend_id PK this simplifies my
application logic as i can delete 1 row easily just using the PK.
delete from user_friends where friend_id = 10; -- uses PK index
This probably isnt a great example of the two types of approaches but it should give you some ideas into why clustered composite indexes are good to use at times :)