I just wanted to inform you, that apparently GCC can do this requested hoisting in the optimizer. Here's a model loop (in C):
struct C
{
int (*calculate)();
};
void sideeffect1();
void sideeffect2();
void sideeffect3();
void foo(struct C *ptr)
{
int i;
for (i=0;i<1000;i++)
{
sideeffect1();
if (ptr == 0 || ptr->calculate()>5) sideeffect2();
sideeffect3();
}
}
Compiling this with gcc 4.5 and -O3 gives:
.globl foo
.type foo, @function
foo:
.LFB0:
pushq %rbp
.LCFI0:
movq %rdi, %rbp
pushq %rbx
.LCFI1:
subq $8, %rsp
.LCFI2:
testq %rdi, %rdi # ptr==0? -> .L2, see below
je .L2
movl $1000, %ebx
.p2align 4,,10
.p2align 3
.L4:
xorl %eax, %eax
call sideeffect1 # sideeffect1
xorl %eax, %eax
call *0(%rbp) # call p->calculate, no check for ptr==0
cmpl $5, %eax
jle .L3
xorl %eax, %eax
call sideeffect2 # ok, call sideeffect2
.L3:
xorl %eax, %eax
call sideeffect3
subl $1, %ebx
jne .L4
addq $8, %rsp
.LCFI3:
xorl %eax, %eax
popq %rbx
.LCFI4:
popq %rbp
.LCFI5:
ret
.L2: # here's the loop with ptr==0
.LCFI6:
movl $1000, %ebx
.p2align 4,,10
.p2align 3
.L6:
xorl %eax, %eax
call sideeffect1 # does not try to call ptr->calculate() anymore
xorl %eax, %eax
call sideeffect2
xorl %eax, %eax
call sideeffect3
subl $1, %ebx
jne .L6
addq $8, %rsp
.LCFI7:
xorl %eax, %eax
popq %rbx
.LCFI8:
popq %rbp
.LCFI9:
ret
And so does clang 2.7 (-O3):
foo:
.Leh_func_begin1:
pushq %rbp
.Llabel1:
movq %rsp, %rbp
.Llabel2:
pushq %r14
pushq %rbx
.Llabel3:
testq %rdi, %rdi # ptr==NULL -> .LBB1_5
je .LBB1_5
movq %rdi, %rbx
movl $1000, %r14d
.align 16, 0x90
.LBB1_2:
xorb %al, %al # here's the loop with the ptr->calculate check()
callq sideeffect1
xorb %al, %al
callq *(%rbx)
cmpl $6, %eax
jl .LBB1_4
xorb %al, %al
callq sideeffect2
.LBB1_4:
xorb %al, %al
callq sideeffect3
decl %r14d
jne .LBB1_2
jmp .LBB1_7
.LBB1_5:
movl $1000, %r14d
.align 16, 0x90
.LBB1_6:
xorb %al, %al # and here's the loop for the ptr==NULL case
callq sideeffect1
xorb %al, %al
callq sideeffect2
xorb %al, %al
callq sideeffect3
decl %r14d
jne .LBB1_6
.LBB1_7:
popq %rbx
popq %r14
popq %rbp
ret