[llvm] r300957 - X86 memcpy: use REPMOVSB instead of REPMOVS{Q, D, W} for inline copies
Clement Courbet via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 21 02:20:40 PDT 2017
Author: courbet
Date: Fri Apr 21 04:20:39 2017
New Revision: 300957
URL: http://llvm.org/viewvc/llvm-project?rev=300957&view=rev
Log:
X86 memcpy: use REPMOVSB instead of REPMOVS{Q,D,W} for inline copies
when the subtarget has fast strings.
This has two advantages:
- Speed is improved. For example, on Haswell thoughput improvements increase
linearly with size from 256 to 512 bytes, after which they plateau:
(e.g. 1% for 260 bytes, 25% for 400 bytes, 40% for 508 bytes).
- Code is much smaller (no need to handle boundaries).
Added:
llvm/trunk/test/CodeGen/X86/memcpy-struct-by-value.ll
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.h
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=300957&r1=300956&r2=300957&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Fri Apr 21 04:20:39 2017
@@ -273,6 +273,13 @@ def FeatureFastSHLDRotate
"fast-shld-rotate", "HasFastSHLDRotate", "true",
"SHLD can be used as a faster rotate">;
+// String operations (e.g. REP MOVS) are fast. See "REP String Enhancement" in
+// the Intel Software Development Manual.
+def FeatureFastString
+ : SubtargetFeature<
+ "fast-string", "HasFastString", "true",
+ "REP MOVS/STOS are fast">;
+
//===----------------------------------------------------------------------===//
// X86 processors supported.
//===----------------------------------------------------------------------===//
@@ -498,6 +505,7 @@ def HSWFeatures : ProcessorFeatures<IVBF
FeatureAVX2,
FeatureBMI,
FeatureBMI2,
+ FeatureFastString,
FeatureFMA,
FeatureLZCNT,
FeatureMOVBE,
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=300957&r1=300956&r2=300957&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Fri Apr 21 04:20:39 2017
@@ -897,6 +897,7 @@ def NotSlowIncDec : Predicate<"!Subtarge
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
+def HasFastString : Predicate<"Subtarget->hasFastString()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp?rev=300957&r1=300956&r2=300957&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86SelectionDAGInfo.cpp Fri Apr 21 04:20:39 2017
@@ -215,7 +215,12 @@ SDValue X86SelectionDAGInfo::EmitTargetC
return SDValue();
MVT AVT;
- if (Align & 1)
+ if (Subtarget.hasFastString())
+ // If the target has fast strings, then it's at least as fast to use
+ // REP MOVSB instead of REP MOVS{W,D,Q}, and it avoids having to handle
+ // BytesLeft.
+ AVT = MVT::i8;
+ else if (Align & 1)
AVT = MVT::i8;
else if (Align & 2)
AVT = MVT::i16;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=300957&r1=300956&r2=300957&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Fri Apr 21 04:20:39 2017
@@ -303,6 +303,7 @@ void X86Subtarget::initializeEnvironment
HasFastVectorFSQRT = false;
HasFastLZCNT = false;
HasFastSHLDRotate = false;
+ HasFastString = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=300957&r1=300956&r2=300957&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Fri Apr 21 04:20:39 2017
@@ -232,6 +232,9 @@ protected:
/// True if SHLD based rotate is fast.
bool HasFastSHLDRotate;
+ /// True if the processor has fast REP MOVS.
+ bool HasFastString;
+
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
@@ -472,6 +475,7 @@ public:
bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
bool hasFastLZCNT() const { return HasFastLZCNT; }
bool hasFastSHLDRotate() const { return HasFastSHLDRotate; }
+ bool hasFastString() const { return HasFastString; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
Added: llvm/trunk/test/CodeGen/X86/memcpy-struct-by-value.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memcpy-struct-by-value.ll?rev=300957&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memcpy-struct-by-value.ll (added)
+++ llvm/trunk/test/CodeGen/X86/memcpy-struct-by-value.ll Fri Apr 21 04:20:39 2017
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=-fast-string < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=NOFAST
+; RUN: llc -mtriple=x86_64-linux-gnu -mattr=+fast-string < %s -o - | FileCheck %s --check-prefix=ALL --check-prefix=FAST
+
+%struct.large = type { [4096 x i8] }
+
+declare void @foo(%struct.large* align 8 byval) nounwind
+
+define void @test1(%struct.large* nocapture %x) nounwind {
+ call void @foo(%struct.large* align 8 byval %x)
+ ret void
+
+; ALL-LABEL: test1:
+; NOFAST: rep;movsq
+; FAST: rep;movsb
+}
More information about the llvm-commits
mailing list