[llvm] r270036 - [X86] Enable RRL part of the LEA optimization pass for -O2.

Thu May 19 03:18:35 PDT 2016

Author: aturetsk
Date: Thu May 19 05:18:29 2016
New Revision: 270036

URL: http://llvm.org/viewvc/llvm-project?rev=270036&view=rev
Log:
[X86] Enable RRL part of the LEA optimization pass for -O2.

Enable "Remove Redundant LEAs" part of the LEA optimization pass for -O2.
This gives 6.4% performance improve on Broadwell on nnet benchmark from Coremark-pro.
There is no significant effect on other benchmarks (Geekbench, Spec2000, Spec2006).

Differential Revision: http://reviews.llvm.org/D19659

Modified:
    llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp
    llvm/trunk/test/CodeGen/X86/lea-opt.ll

Modified: llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp?rev=270036&r1=270035&r2=270036&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp Thu May 19 05:18:29 2016
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines the pass that performs some optimizations with LEA
-// instructions in order to improve code size.
+// instructions in order to improve performance and code size.
 // Currently, it does two things:
 // 1) If there are two LEA instructions calculating addresses which only differ
 //    by displacement inside a basic block, one of them is removed.
@@ -614,9 +614,7 @@ bool OptimizeLEAPass::removeRedundantLEA
 bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
   bool Changed = false;
 
-  // Perform this optimization only if we care about code size.
-  if (DisableX86LEAOpt || skipFunction(*MF.getFunction()) ||
-      !MF.getFunction()->optForSize())
+  if (DisableX86LEAOpt || skipFunction(*MF.getFunction()))
     return false;
 
   MRI = &MF.getRegInfo();
@@ -635,13 +633,13 @@ bool OptimizeLEAPass::runOnMachineFuncti
     if (LEAs.empty())
       continue;
 
-    // Remove redundant LEA instructions. The optimization may have a negative
-    // effect on performance, so do it only for -Oz.
-    if (MF.getFunction()->optForMinSize())
-      Changed |= removeRedundantLEAs(LEAs);
+    // Remove redundant LEA instructions.
+    Changed |= removeRedundantLEAs(LEAs);
 
-    // Remove redundant address calculations.
-    Changed |= removeRedundantAddrCalc(LEAs);
+    // Remove redundant address calculations. Do it only for -Os/-Oz since only
+    // a code size gain is expected from this part of the pass.
+    if (MF.getFunction()->optForSize())
+      Changed |= removeRedundantAddrCalc(LEAs);
   }
 
   return Changed;

Modified: llvm/trunk/test/CodeGen/X86/lea-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt.ll?rev=270036&r1=270035&r2=270036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea-opt.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea-opt.ll Thu May 19 05:18:29 2016
@@ -1,4 +1,5 @@
-; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=ENABLED
+; RUN: llc --disable-x86-lea-opt < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK -check-prefix=DISABLED
 
 %struct.anon1 = type { i32, i32, i32 }
 %struct.anon2 = type { i32, [32 x i32], i32 }
@@ -38,12 +39,14 @@ sw.epilog:
 ; CHECK:	movl arr1([[REG1]],[[REG1]],2), {{.*}}
 ; CHECK:	leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]]
 ; CHECK:	subl arr1+4([[REG1]],[[REG1]],2), {{.*}}
-; CHECK:	leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
+; DISABLED:	leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
 ; CHECK:	addl arr1+8([[REG1]],[[REG1]],2), {{.*}}
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
-; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; ENABLED:	movl ${{[1-4]+}}, 4([[REG2]])
+; DISABLED:	movl ${{[1-4]+}}, ([[REG3]])
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
-; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; ENABLED:	movl ${{[1-4]+}}, 4([[REG2]])
+; DISABLED:	movl ${{[1-4]+}}, ([[REG3]])
 }
 
 define void @test2(i64 %x) nounwind optsize {
@@ -75,15 +78,20 @@ sw.epilog:
   ret void
 ; CHECK-LABEL: test2:
 ; CHECK:	shlq $2, [[REG1:%[a-z]+]]
+; DISABLED:	movl arr1([[REG1]],[[REG1]],2), {{.*}}
 ; CHECK:	leaq arr1+4([[REG1]],[[REG1]],2), [[REG2:%[a-z]+]]
-; CHECK:	movl -4([[REG2]]), {{.*}}
-; CHECK:	subl ([[REG2]]), {{.*}}
-; CHECK:	leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
-; CHECK:	addl ([[REG3]]), {{.*}}
+; ENABLED:	movl -4([[REG2]]), {{.*}}
+; ENABLED:	subl ([[REG2]]), {{.*}}
+; ENABLED:	addl 4([[REG2]]), {{.*}}
+; DISABLED:	subl arr1+4([[REG1]],[[REG1]],2), {{.*}}
+; DISABLED:	leaq arr1+8([[REG1]],[[REG1]],2), [[REG3:%[a-z]+]]
+; DISABLED:	addl arr1+8([[REG1]],[[REG1]],2), {{.*}}
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
-; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; ENABLED:	movl ${{[1-4]+}}, 4([[REG2]])
+; DISABLED:	movl ${{[1-4]+}}, ([[REG3]])
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
-; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; ENABLED:	movl ${{[1-4]+}}, 4([[REG2]])
+; DISABLED:	movl ${{[1-4]+}}, ([[REG3]])
 }
 
 ; Check that LEA optimization pass takes into account a resultant address
@@ -109,7 +117,9 @@ sw.bb.1:
 
 sw.bb.2:                                          ; preds = %entry
   store i32 333, i32* %a, align 4
-  store i32 444, i32* %b, align 4
+  ; Make sure the REG3's definition LEA won't be removed as redundant.
+  %cvt = ptrtoint i32* %b to i32
+  store i32 %cvt, i32* %b, align 4
   br label %sw.epilog
 
 sw.epilog:                                        ; preds = %sw.bb.2, %sw.bb.1, %entry
@@ -122,12 +132,14 @@ sw.epilog:
 ; REG3's definition is closer to movl than REG2's, but the pass still chooses
 ; REG2 because it provides the resultant address displacement fitting 1 byte.
 
-; CHECK:	movl ([[REG2]]), {{.*}}
-; CHECK:	addl ([[REG3]]), {{.*}}
+; ENABLED:	movl ([[REG2]]), {{.*}}
+; ENABLED:	addl ([[REG3]]), {{.*}}
+; DISABLED:	movl arr2+132([[REG1]]), {{.*}}
+; DISABLED:	addl arr2([[REG1]]), {{.*}}
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
 ; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
-; CHECK:	movl ${{[1-4]+}}, ([[REG3]])
+; CHECK:	movl {{.*}}, ([[REG3]])
 }
 
 define void @test4(i64 %x) nounwind minsize {
@@ -158,12 +170,19 @@ sw.bb.2:
 sw.epilog:                                        ; preds = %sw.bb.2, %sw.bb.1, %entry
   ret void
 ; CHECK-LABEL: test4:
-; CHECK:	leaq arr1+4({{.*}}), [[REG2:%[a-z]+]]
-; CHECK:	movl -4([[REG2]]), {{.*}}
-; CHECK:	subl ([[REG2]]), {{.*}}
-; CHECK:	addl 4([[REG2]]), {{.*}}
+; CHECK:	imulq {{.*}}, [[REG1:%[a-z]+]]
+; DISABLED:	movl arr1([[REG1]]), {{.*}}
+; CHECK:	leaq arr1+4([[REG1]]), [[REG2:%[a-z]+]]
+; ENABLED:	movl -4([[REG2]]), {{.*}}
+; ENABLED:	subl ([[REG2]]), {{.*}}
+; ENABLED:	addl 4([[REG2]]), {{.*}}
+; DISABLED:	subl arr1+4([[REG1]]), {{.*}}
+; DISABLED:	leaq arr1+8([[REG1]]), [[REG3:%[a-z]+]]
+; DISABLED:	addl arr1+8([[REG1]]), {{.*}}
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
-; CHECK:	movl ${{[1-4]+}}, 4([[REG2]])
+; ENABLED:	movl ${{[1-4]+}}, 4([[REG2]])
+; DISABLED:	movl ${{[1-4]+}}, ([[REG3]])
 ; CHECK:	movl ${{[1-4]+}}, ([[REG2]])
-; CHECK:	movl ${{[1-4]+}}, 4([[REG2]])
+; ENABLED:	movl ${{[1-4]+}}, 4([[REG2]])
+; DISABLED:	movl ${{[1-4]+}}, ([[REG3]])
 }