I tried to reconstruct the optimization steps of the binary search algorithm. I start with this iterative version:
int binsearch_1( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
int found=0;
while( size > 0 ){
size_t w=size/2;
if( p[w] < key ){ p+=w+1; size-=w+1; }
else if( p[w] > key ){ size =w; }
else /* p[w] == key */{ p+=w; found=1; break; }
}
*index=p-array; return found;
}
Eliminating comparisons from the loop:
int binsearch_2( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 0 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
*index=p-array; return p[0]==key;
}
Unrolling small sizes from the loop:
int binsearch_3( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==7 ){ if( p[4] <= key ){ p+=4; size=3; } else size=3; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==3 ){ if( p[2] <= key ){ p+=2; size=1; } else size=1; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
if( size==1 ){ if( p[1] <= key ){ p+=1; } }
*index=p-array; return p[0]==key;
}
Reordering if statements, moving special cases [size==pow(2,N)-1] to the end:
int binsearch_4( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
if( size==7 ){ if( p[4] <= key ){ p+=4; size=3; } else size=3; }
if( size==3 ){ if( p[2] <= key ){ p+=2; size=1; } else size=1; }
if( size==1 ){ if( p[1] <= key ){ p+=1; } }
*index=p-array; return p[0]==key;
}
Changing if statements to a switch statement:
int binsearch_5( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
while( size > 8 ){
size_t w=size/2;
if( p[w+1] <= key ){ p+=w+1; size-=w+1; } else size =w;
}
if( size==8 ){ if( p[5] <= key ){ p+=5; size=3; } else size=4; }
if( size==6 ){ if( p[4] <= key ){ p+=4; size=2; } else size=3; }
if( size==5 ){ if( p[3] <= key ){ p+=3; size=2; } else size=2; }
if( size==4 ){ if( p[3] <= key ){ p+=3; size=1; } else size=2; }
if( size==2 ){ if( p[2] <= key ){ p+=2; size=0; } else size=1; }
switch(size){
case 7: if( p[4] <= key ) p+=4;
case 3: if( p[2] <= key ) p+=2;
case 1: if( p[1] <= key ) p+=1;
}
*index=p-array; return p[0]==key;
}
Extending switch statement to handle all of the special cases:
int binsearch_6( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
switch( size ){
#define C(n) case ((size_t)1<<n)-1: if( p[1<<(n-1)]<=key ) p+=1<<(n-1);
#if SIZE_MAX == UINT64_MAX
C(63) C(62) C(61)
C(60) C(59) C(58) C(57) C(56) C(55) C(54) C(53) C(52) C(51)
C(50) C(49) C(48) C(47) C(46) C(45) C(44) C(43) C(42) C(41)
C(40) C(39) C(38) C(37) C(36) C(35) C(34) C(33) C(32)
#endif
C(31)
C(30) C(29) C(28) C(27) C(26) C(25) C(24) C(23) C(22) C(21)
C(20) C(19) C(18) C(17) C(16) C(15) C(14) C(13) C(12) C(11)
C(10) C( 9) C( 8) C( 7) C( 6) C( 5) C( 4) C( 3) C( 2) C( 1)
#undef C
break;
default:
while( size > 0 ){
size_t w=size/2;
if( p[w] < key ){ p+=w+1; size-=w+1; } else size=w;
}
}
*index=p-array; return p[0]==key;
}
Eliminating the general case handling from the code: [ w is the smallest number: w==pow(2,N)-1; size<=2*(w+1) ]
int binsearch_7( arr_t array[], size_t size, arr_t key, size_t *index ){
if( !array || !size ) return 0;
arr_t *p=array;
size_t w=(size-1)>>1; w|=w>>1; w|=w>>2; w|=w>>4; w|=w>>8; w|=w>>16;
#if SIZE_MAX == UINT64_MAX
w|=w>>32;
#endif
if( p[w]<key ) p+=size-w-1;
switch( w ){
#define C(n) case ((size_t)1<<n)-1: if( p[1<<(n-1)]<=key ) p+=1<<(n-1);
#if SIZE_MAX == UINT64_MAX
C(63) C(62) C(61)
C(60) C(59) C(58) C(57) C(56) C(55) C(54) C(53) C(52) C(51)
C(50) C(49) C(48) C(47) C(46) C(45) C(44) C(43) C(42) C(41)
C(40) C(39) C(38) C(37) C(36) C(35) C(34) C(33) C(32)
#endif
C(31)
C(30) C(29) C(28) C(27) C(26) C(25) C(24) C(23) C(22) C(21)
C(20) C(19) C(18) C(17) C(16) C(15) C(14) C(13) C(12) C(11)
C(10) C( 9) C( 8) C( 7) C( 6) C( 5) C( 4) C( 3) C( 2) C( 1)
#undef C
}
*index=p-array; return p[0]==key;
}
The last step what i made was the simplifying of the case labels [from: '((size_t)1<<n)-1' to: 'n'] but i found that the modified code was slower on my old PC than the previous version.