[llvm] 28ccc52 - [X86] Add feature for Fast Short REP MOV (FSRM) for Icelake or newer.
Hiroshi Yamauchi via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 19 13:39:55 PDT 2020
Author: Hiroshi Yamauchi
Date: 2020-08-19T13:39:42-07:00
New Revision: 28ccc52c4045f3d142e6f83d6f64d359b1c12808
URL: https://github.com/llvm/llvm-project/commit/28ccc52c4045f3d142e6f83d6f64d359b1c12808
DIFF: https://github.com/llvm/llvm-project/commit/28ccc52c4045f3d142e6f83d6f64d359b1c12808.diff
LOG: [X86] Add feature for Fast Short REP MOV (FSRM) for Icelake or newer.
Differential Revision: https://reviews.llvm.org/D85989
Added:
Modified:
llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86InstrInfo.td
llvm/lib/Target/X86/X86Subtarget.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 6debd3a0d3a9..bb52bd6128ad 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -372,6 +372,12 @@ def FeatureERMSB
"ermsb", "HasERMSB", "true",
"REP MOVS/STOS are fast">;
+// Icelake and newer processors have Fast Short REP MOV.
+def FeatureFSRM
+ : SubtargetFeature<
+ "fsrm", "HasFSRM", "true",
+ "REP MOVSB of short lengths is faster">;
+
// Bulldozer and newer processors can merge CMP/TEST (but not other
// instructions) with conditional branches.
def FeatureBranchFusion
@@ -713,7 +719,8 @@ def ProcessorFeatures {
FeatureVPOPCNTDQ,
FeatureGFNI,
FeatureCLWB,
- FeatureRDPID];
+ FeatureRDPID,
+ FeatureFSRM];
list<SubtargetFeature> ICLTuning = CNLTuning;
list<SubtargetFeature> ICLFeatures =
!listconcat(CNLFeatures, ICLAdditionalFeatures);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 3ea0ae8a8840..8d2178066d4f 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -1016,6 +1016,7 @@ def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
def HasERMSB : Predicate<"Subtarget->hasERMSB()">;
+def HasFSRM : Predicate<"Subtarget->hasFSRM()">;
def HasMFence : Predicate<"Subtarget->hasMFence()">;
def UseIndirectThunkCalls : Predicate<"Subtarget->useIndirectThunkCalls()">;
def NotUseIndirectThunkCalls : Predicate<"!Subtarget->useIndirectThunkCalls()">;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 7b48e27f70e6..923f8105870f 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -302,6 +302,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// True if the processor has enhanced REP MOVSB/STOSB.
bool HasERMSB = false;
+ /// True if the processor has fast short REP MOV.
+ bool HasFSRM = false;
+
/// True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions = false;
@@ -694,6 +697,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasBranchFusion() const { return HasBranchFusion; }
bool hasERMSB() const { return HasERMSB; }
+ bool hasFSRM() const { return HasFSRM; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
More information about the llvm-commits
mailing list