[llvm] r222521 - [X86] For Silvermont CPU use 16-bit division instead of 64-bit for small positive numbers
Alexey Volkov
avolkov.intel at gmail.com
Fri Nov 21 03:19:35 PST 2014
Author: volkalex
Date: Fri Nov 21 05:19:34 2014
New Revision: 222521
URL: http://llvm.org/viewvc/llvm-project?rev=222521&view=rev
Log:
[X86] For Silvermont CPU use 16-bit division instead of 64-bit for small positive numbers
Differential Revision: http://reviews.llvm.org/D5938
Added:
llvm/trunk/test/CodeGen/X86/slow-div.ll
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.h
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=222521&r1=222520&r2=222521&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Fri Nov 21 05:19:34 2014
@@ -167,9 +167,12 @@ def FeatureSMAP : SubtargetFeature<"s
"Support SMAP instructions">;
def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true",
"Use LEA for adjusting the stack pointer">;
-def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb",
- "HasSlowDivide", "true",
- "Use small divide for positive values less than 256">;
+def FeatureSlowDivide32 : SubtargetFeature<"idivl-to-divb",
+ "HasSlowDivide32", "true",
+ "Use 8-bit divide for positive values less than 256">;
+def FeatureSlowDivide64 : SubtargetFeature<"idivq-to-divw",
+ "HasSlowDivide64", "true",
+ "Use 16-bit divide for positive values less than 65536">;
def FeaturePadShortFunctions : SubtargetFeature<"pad-short-functions",
"PadShortFunctions", "true",
"Pad short functions">;
@@ -234,7 +237,7 @@ def : ProcessorModel<"penryn", SandyBrid
def : ProcessorModel<"atom", AtomModel,
[ProcIntelAtom, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP,
- FeatureSlowDivide,
+ FeatureSlowDivide32, FeatureSlowDivide64,
FeatureCallRegIndirect,
FeatureLEAUsesAG,
FeaturePadShortFunctions]>;
@@ -244,6 +247,7 @@ def : ProcessorModel<"slm", SLMModel, [
FeatureSSE42, FeatureCMPXCHG16B,
FeatureMOVBE, FeaturePOPCNT,
FeaturePCLMUL, FeatureAES,
+ FeatureSlowDivide64,
FeatureCallRegIndirect,
FeaturePRFCHW,
FeatureSlowLEA, FeatureSlowIncDec,
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=222521&r1=222520&r2=222521&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Nov 21 05:19:34 2014
@@ -249,9 +249,10 @@ void X86TargetLowering::resetOperationAc
setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
// Bypass expensive divides on Atom when compiling with O2
- if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) {
- addBypassSlowDiv(32, 8);
- if (Subtarget->is64Bit())
+ if (TM.getOptLevel() >= CodeGenOpt::Default) {
+ if (Subtarget->hasSlowDivide32())
+ addBypassSlowDiv(32, 8);
+ if (Subtarget->hasSlowDivide64() && Subtarget->is64Bit())
addBypassSlowDiv(64, 16);
}
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=222521&r1=222520&r2=222521&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Fri Nov 21 05:19:34 2014
@@ -267,7 +267,8 @@ void X86Subtarget::initializeEnvironment
HasVectorUAMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
- HasSlowDivide = false;
+ HasSlowDivide32 = false;
+ HasSlowDivide64 = false;
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=222521&r1=222520&r2=222521&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Fri Nov 21 05:19:34 2014
@@ -171,9 +171,13 @@ protected:
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
- /// HasSlowDivide - True if smaller divides are significantly faster than
- /// full divides and should be used when possible.
- bool HasSlowDivide;
+ /// HasSlowDivide32 - True if 8-bit divisions are significantly faster than
+ /// 32-bit divisions and should be used when possible.
+ bool HasSlowDivide32;
+
+ /// HasSlowDivide64 - True if 16-bit divides are significantly faster than
+ /// 64-bit divisions and should be used when possible.
+ bool HasSlowDivide64;
/// PadShortFunctions - True if the short functions should be padded to prevent
/// a stall when returning too early.
@@ -373,7 +377,8 @@ public:
bool hasVectorUAMem() const { return HasVectorUAMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
- bool hasSlowDivide() const { return HasSlowDivide; }
+ bool hasSlowDivide32() const { return HasSlowDivide32; }
+ bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
bool callRegIndirect() const { return CallRegIndirect; }
bool LEAusesAG() const { return LEAUsesAG; }
Added: llvm/trunk/test/CodeGen/X86/slow-div.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/slow-div.ll?rev=222521&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/slow-div.ll (added)
+++ llvm/trunk/test/CodeGen/X86/slow-div.ll Fri Nov 21 05:19:34 2014
@@ -0,0 +1,28 @@
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivl-to-divb < %s | FileCheck -check-prefix=DIV32 %s
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+idivq-to-divw < %s | FileCheck -check-prefix=DIV64 %s
+
+define i32 @div32(i32 %a, i32 %b) {
+entry:
+; DIV32-LABEL: div32:
+; DIV32: orl %{{.*}}, [[REG:%[a-z]+]]
+; DIV32: testl $-256, [[REG]]
+; DIV32: divb
+; DIV64-LABEL: div32:
+; DIV64-NOT: divb
+ %div = sdiv i32 %a, %b
+ ret i32 %div
+}
+
+define i64 @div64(i64 %a, i64 %b) {
+entry:
+; DIV32-LABEL: div64:
+; DIV32-NOT: divw
+; DIV64-LABEL: div64:
+; DIV64: orq %{{.*}}, [[REG:%[a-z]+]]
+; DIV64: testq $-65536, [[REG]]
+; DIV64: divw
+ %div = sdiv i64 %a, %b
+ ret i64 %div
+}
+
+
More information about the llvm-commits
mailing list