[llvm-dev] [RFC] New pass: LoopExitValues
Steve King via llvm-dev
llvm-dev at lists.llvm.org
Tue Sep 1 11:06:53 PDT 2015
On Mon, Aug 31, 2015 at 5:52 PM, Jake VanAdrighem
<jvanadrighem at gmail.com> wrote:
> Do you have some specific performance measurements?
Averaging 4 runs of 10000 iterations each of Coremark on my X86_64
desktop showed:
-O2 performance: +2.9% faster with the L.E.V. pass
-Os size: 1.5% smaller with the L.E.V. pass
In the case of Coremark, the benefit comes mainly from the matrix
portion benchmark, which uses nested loops. Similarly, I used a
matrix multiplication for the regression test as shown below. The
L.E.V. pass eliminated 4 instructions.
void matrix_mul(unsigned int Size, unsigned int *Dst, unsigned int
*Src, unsigned int Val) {
for (int Outer = 0; Outer < Size; ++Outer)
for (int Inner = 0; Inner < Size; ++Inner)
Dst[Outer * Size + Inner] = Src[Outer * Size + Inner] * Val;
}
With LoopExitValues
-------------------------------
matrix_mul:
testl %edi, %edi
je .LBB0_5
xorl %r9d, %r9d
xorl %r8d, %r8d
.LBB0_2:
xorl %r11d, %r11d
.LBB0_3:
movl %r9d, %r10d
movl (%rdx,%r10,4), %eax
imull %ecx, %eax
movl %eax, (%rsi,%r10,4)
incl %r11d
incl %r9d
cmpl %r11d, %edi
jne .LBB0_3
incl %r8d
cmpl %edi, %r8d
jne .LBB0_2
.LBB0_5:
retq
Without LoopExitValues:
-----------------------------------
matrix_mul:
pushq %rbx # Eliminated by L.E.V. pass
.Ltmp0:
.Ltmp1:
testl %edi, %edi
je .LBB0_5
xorl %r8d, %r8d
xorl %r9d, %r9d
.LBB0_2:
xorl %r10d, %r10d
movl %r8d, %eax # Eliminated by L.E.V. pass
.LBB0_3:
movl %eax, %r11d
movl (%rdx,%r11,4), %ebx
imull %ecx, %ebx
movl %ebx, (%rsi,%r11,4)
incl %r10d
incl %eax
cmpl %r10d, %edi
jne .LBB0_3
incl %r9d
addl %edi, %r8d # Eliminated by L.E.V. pass
cmpl %edi, %r9d
jne .LBB0_2
.LBB0_5:
popq %rbx # Eliminated by L.E.V. pass
retq
More information about the llvm-dev
mailing list