[llvm] r176442 - Bypass Slow Divides

Preston Gurd preston.gurd at intel.com
Mon Mar 4 10:13:57 PST 2013


Author: pgurd
Date: Mon Mar  4 12:13:57 2013
New Revision: 176442

URL: http://llvm.org/viewvc/llvm-project?rev=176442&view=rev
Log:
Bypass Slow Divides

* Only apply divide bypass optimization when not optimizing for size. 
* Fixed bug caused by constant for 0 value of type Int32,
  used dividend type to generate the constant instead.
* For atom x86-64 apply the divide bypass to use 16-bit divides instead of
  64-bit divides when operand values are small enough.
* Added lit tests for 64-bit divide bypass.

Patch by Tyler Nowicki!



Added:
    llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp
    llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp
    llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=176442&r1=176441&r2=176442&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Mar  4 12:13:57 2013
@@ -181,9 +181,12 @@ X86TargetLowering::X86TargetLowering(X86
     setSchedulingPreference(Sched::RegPressure);
   setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
 
-  // Bypass i32 with i8 on Atom when compiling with O2
-  if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default)
+  // Bypass expensive divides on Atom when compiling with O2
+  if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
     addBypassSlowDiv(32, 8);
+    if (Subtarget->is64Bit())
+      addBypassSlowDiv(64, 16);
+  }
 
   if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) {
     // Setup Windows compiler runtime calls.

Modified: llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=176442&r1=176441&r2=176442&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp Mon Mar  4 12:13:57 2013
@@ -154,7 +154,7 @@ bool CodeGenPrepare::runOnFunction(Funct
 
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
-  if (TLI && TLI->isSlowDivBypassed()) {
+  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
     const DenseMap<unsigned int, unsigned int> &BypassWidths =
        TLI->getBypassSlowDivWidths();
     for (Function::iterator I = F.begin(); I != F.end(); I++)

Modified: llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp?rev=176442&r1=176441&r2=176442&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/BypassSlowDivision.cpp Mon Mar  4 12:13:57 2013
@@ -163,7 +163,7 @@ static bool insertFastDiv(Function &F,
   Value *AndV = MainBuilder.CreateAnd(OrV, BitMask);
 
   // Compare operand values and branch
-  Value *ZeroV = MainBuilder.getInt32(0);
+  Value *ZeroV = ConstantInt::getSigned(Dividend->getType(), 0);
   Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV);
   MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB);
 
@@ -244,7 +244,7 @@ bool llvm::bypassSlowDivision(Function &
 
     // Get bitwidth of div/rem instruction
     IntegerType *T = cast<IntegerType>(J->getType());
-    int bitwidth = T->getBitWidth();
+    unsigned int bitwidth = T->getBitWidth();
 
     // Continue if bitwidth is not bypassed
     DenseMap<unsigned int, unsigned int>::const_iterator BI = BypassWidths.find(bitwidth);

Added: llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll?rev=176442&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll (added)
+++ llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division-64.ll Mon Mar  4 12:13:57 2013
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux -march=x86-64 | FileCheck %s
+
+; Additional tests for 64-bit divide bypass
+
+define i64 @Test_get_quotient(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = sdiv i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: ret
+; CHECK: divw
+; CHECK: ret
+  %result = srem i64 %a, %b
+  ret i64 %result
+}
+
+define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind {
+; CHECK: Test_get_quotient_and_remainder:
+; CHECK: orq %rsi, %rcx
+; CHECK-NEXT: testq $-65536, %rcx
+; CHECK-NEXT: je
+; CHECK: idivq
+; CHECK: divw
+; CHECK: addq
+; CHECK: ret
+; CHECK-NOT: idivq
+; CHECK-NOT: divw
+  %resultdiv = sdiv i64 %a, %b
+  %resultrem = srem i64 %a, %b
+  %result = add i64 %resultdiv, %resultrem
+  ret i64 %result
+}

Modified: llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll?rev=176442&r1=176441&r2=176442&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atom-bypass-slow-division.ll Mon Mar  4 12:13:57 2013
@@ -1,4 +1,4 @@
-; RUN: llc < %s -mcpu=atom -mtriple=i686-linux  | FileCheck %s
+; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s
 
 define i32 @Test_get_quotient(i32 %a, i32 %b) nounwind {
 ; CHECK: Test_get_quotient:





More information about the llvm-commits mailing list