[llvm] r323015 - [X86] Add support for passing 'prefer-vector-width' function attribute into X86Subtarget and exposing via X86's getRegisterWidth TTI interface.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 19 16:26:09 PST 2018
Author: ctopper
Date: Fri Jan 19 16:26:08 2018
New Revision: 323015
URL: http://llvm.org/viewvc/llvm-project?rev=323015&view=rev
Log:
[X86] Add support for passing 'prefer-vector-width' function attribute into X86Subtarget and exposing via X86's getRegisterWidth TTI interface.
This will cause the vectorizers to do some limiting of the vector widths they create. This is not a strict limit. There are reasons I know of that the loop vectorizer will generate larger vectors for.
I've written this in such a way that the interface will only return a properly supported width(0/128/256/512) even if the attribute says something funny like 384 or 10.
This has been split from D41895 with the remainder in a follow up commit.
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86Subtarget.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.h
llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/test/Transforms/LoopVectorize/X86/avx512.ll
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=323015&r1=323014&r2=323015&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Fri Jan 19 16:26:08 2018
@@ -334,6 +334,10 @@ def FeatureHasFastGather
: SubtargetFeature<"fast-gather", "HasFastGather", "true",
"Indicates if gather is reasonably fast.">;
+def FeaturePrefer256Bit
+ : SubtargetFeature<"prefer-256-bit", "Prefer256Bit", "true",
+ "Prefer 256-bit AVX instructions">;
+
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=323015&r1=323014&r2=323015&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Fri Jan 19 16:26:08 2018
@@ -254,6 +254,12 @@ void X86Subtarget::initSubtargetFeatures
GatherOverhead = 2;
if (hasAVX512())
ScatterOverhead = 2;
+
+ // Consume the vector width attribute or apply any target specific limit.
+ if (PreferVectorWidthOverride)
+ PreferVectorWidth = PreferVectorWidthOverride;
+ else if (Prefer256Bit)
+ PreferVectorWidth = 256;
}
void X86Subtarget::initializeEnvironment() {
@@ -347,6 +353,8 @@ void X86Subtarget::initializeEnvironment
X86ProcFamily = Others;
GatherOverhead = 1024;
ScatterOverhead = 1024;
+ PreferVectorWidth = UINT32_MAX;
+ Prefer256Bit = false;
}
X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU,
@@ -358,10 +366,12 @@ X86Subtarget &X86Subtarget::initializeSu
X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
const X86TargetMachine &TM,
- unsigned StackAlignOverride)
+ unsigned StackAlignOverride,
+ unsigned PreferVectorWidthOverride)
: X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others),
PICStyle(PICStyles::None), TM(TM), TargetTriple(TT),
StackAlignOverride(StackAlignOverride),
+ PreferVectorWidthOverride(PreferVectorWidthOverride),
In64BitMode(TargetTriple.getArch() == Triple::x86_64),
In32BitMode(TargetTriple.getArch() == Triple::x86 &&
TargetTriple.getEnvironment() != Triple::CODE16),
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=323015&r1=323014&r2=323015&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Fri Jan 19 16:26:08 2018
@@ -359,6 +359,9 @@ protected:
///
unsigned MaxInlineSizeThreshold;
+ /// Indicates target prefers 256 bit instructions.
+ bool Prefer256Bit;
+
/// What processor and OS we're targeting.
Triple TargetTriple;
@@ -375,6 +378,13 @@ private:
/// Override the stack alignment.
unsigned StackAlignOverride;
+ /// Preferred vector width from function attribute.
+ unsigned PreferVectorWidthOverride;
+
+ /// Resolved preferred vector width from function attribute and subtarget
+ /// features.
+ unsigned PreferVectorWidth;
+
/// True if compiling for 64-bit, false for 16-bit or 32-bit.
bool In64BitMode;
@@ -400,7 +410,8 @@ public:
/// of the specified triple.
///
X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
- const X86TargetMachine &TM, unsigned StackAlignOverride);
+ const X86TargetMachine &TM, unsigned StackAlignOverride,
+ unsigned PreferVectorWidthOverride);
const X86TargetLowering *getTargetLowering() const override {
return &TLInfo;
@@ -584,6 +595,8 @@ public:
bool hasCLWB() const { return HasCLWB; }
bool hasRDPID() const { return HasRDPID; }
+ unsigned getPreferVectorWidth() const { return PreferVectorWidth; }
+
bool isXRaySupported() const override { return is64Bit(); }
X86ProcFamilyEnum getProcFamily() const { return X86ProcFamily; }
Modified: llvm/trunk/lib/Target/X86/X86TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetMachine.cpp?rev=323015&r1=323014&r2=323015&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetMachine.cpp Fri Jan 19 16:26:08 2018
@@ -255,7 +255,24 @@ X86TargetMachine::getSubtargetImpl(const
if (SoftFloat)
Key += FS.empty() ? "+soft-float" : ",+soft-float";
- FS = Key.substr(CPU.size());
+ // Keep track of the key width after all features are added so we can extract
+ // the feature string out later.
+ unsigned CPUFSWidth = Key.size();
+
+ // Translate vector width function attribute into subtarget features. This
+ // overrides any CPU specific turning parameter
+ unsigned PreferVectorWidthOverride = 0;
+ if (F.hasFnAttribute("prefer-vector-width")) {
+ StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString();
+ unsigned Width;
+ if (!Val.getAsInteger(0, Width)) {
+ Key += ",prefer-vector-width=";
+ Key += Val;
+ PreferVectorWidthOverride = Width;
+ }
+ }
+
+ FS = Key.slice(CPU.size(), CPUFSWidth);
auto &I = SubtargetMap[Key];
if (!I) {
@@ -264,7 +281,8 @@ X86TargetMachine::getSubtargetImpl(const
// function that reside in TargetOptions.
resetTargetOptions(F);
I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
- Options.StackAlignmentOverride);
+ Options.StackAlignmentOverride,
+ PreferVectorWidthOverride);
}
return I.get();
}
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=323015&r1=323014&r2=323015&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Fri Jan 19 16:26:08 2018
@@ -130,12 +130,13 @@ unsigned X86TTIImpl::getNumberOfRegister
}
unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) const {
+ unsigned PreferVectorWidth = ST->getPreferVectorWidth();
if (Vector) {
- if (ST->hasAVX512())
+ if (ST->hasAVX512() && PreferVectorWidth >= 512)
return 512;
- if (ST->hasAVX())
+ if (ST->hasAVX() && PreferVectorWidth >= 256)
return 256;
- if (ST->hasSSE1())
+ if (ST->hasSSE1() && PreferVectorWidth >= 128)
return 128;
return 0;
}
@@ -2523,7 +2524,7 @@ bool X86TTIImpl::isLegalMaskedGather(Typ
// TODO: Remove the explicit ST->hasAVX512()?, That would mean we would only
// enable gather with a -march.
return (DataWidth == 32 || DataWidth == 64) &&
- (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()));
+ (ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()));
}
bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/avx512.ll?rev=323015&r1=323014&r2=323015&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/avx512.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/avx512.ll Fri Jan 19 16:26:08 2018
@@ -1,4 +1,5 @@
; RUN: opt -mattr=+avx512f --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
+; RUN: opt -mattr=+avx512vl,+prefer-256-bit --loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
@@ -10,10 +11,86 @@ target triple = "x86_64-apple-macosx10.9
; CHECK: vmovdqu64 %zmm{{.}},
; CHECK-NOT: %ymm
+; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to
+
+; CHECK-PREFER-AVX256-LABEL: f:
+; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
+; CHECK-PREFER-AVX256-NOT: %zmm
+
define void @f(i32* %a, i32 %n) {
entry:
%cmp4 = icmp sgt i32 %n, 0
br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %n, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit
+; vectors
+
+; CHECK-LABEL: g:
+; CHECK: vmovdqu %ymm{{.}},
+; CHECK-NOT: %zmm
+
+; CHECK-PREFER-AVX256-LABEL: g:
+; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
+; CHECK-PREFER-AVX256-NOT: %zmm
+
+define void @g(i32* %a, i32 %n) "prefer-vector-width"="256" {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ store i32 %n, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; Verify that the "prefer-vector-width=512" attribute override the subtarget
+; vectors
+
+; CHECK-LABEL: h:
+; CHECK: vmovdqu64 %zmm{{.}},
+; CHECK-NOT: %ymm
+
+; CHECK-PREFER-AVX256-LABEL: h:
+; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}},
+; CHECK-PREFER-AVX256-NOT: %ymm
+
+define void @h(i32* %a, i32 %n) "prefer-vector-width"="512" {
+entry:
+ %cmp4 = icmp sgt i32 %n, 0
+ br i1 %cmp4, label %for.body.preheader, label %for.end
for.body.preheader: ; preds = %entry
br label %for.body
More information about the llvm-commits
mailing list