[llvm-commits] [llvm] r153230 - in /llvm/trunk: lib/CodeGen/SelectionDAG/FastISel.cpp test/CodeGen/ARM/fast-isel.ll

Chad Rosier mcrosier at apple.com
Wed Mar 21 17:21:18 PDT 2012


Author: mcrosier
Date: Wed Mar 21 19:21:17 2012
New Revision: 153230

URL: http://llvm.org/viewvc/llvm-project?rev=153230&view=rev
Log:
[fast-isel] Fold "urem x, pow2" -> "and x, pow2-1".  This should fix the 271%
execution-time regression for nsieve-bits on the ARMv7 -O0 -g nightly tester.
This may also improve compile-time on architectures that would otherwise 
generate a libcall for urem (e.g., ARM) or fall back to the DAG selector.
rdar://10810716

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp
    llvm/trunk/test/CodeGen/ARM/fast-isel.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp?rev=153230&r1=153229&r2=153230&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/FastISel.cpp Wed Mar 21 19:21:17 2012
@@ -395,6 +395,13 @@
       ISDOpcode = ISD::SRA;
     }
 
+    // Transform "urem x, pow2" -> "and x, pow2-1".
+    if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+        isPowerOf2_64(Imm)) {
+      --Imm;
+      ISDOpcode = ISD::AND;
+    }
+
     unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
                                       Op0IsKill, Imm, VT.getSimpleVT());
     if (ResultReg == 0) return false;

Modified: llvm/trunk/test/CodeGen/ARM/fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fast-isel.ll?rev=153230&r1=153229&r2=153230&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fast-isel.ll Wed Mar 21 19:21:17 2012
@@ -217,3 +217,12 @@
 ; THUMB: vcmpe.f32 s0, #0
   ret i1 %4
 }
+
+; ARM: @urem_fold
+; THUMB: @urem_fold
+; ARM: and r0, r0, #31
+; THUMB: and r0, r0, #31
+define i32 @urem_fold(i32 %a) nounwind {
+  %rem = urem i32 %a, 32
+  ret i32 %rem
+}





More information about the llvm-commits mailing list