[llvm] 4c4b718 - [AMDGPU] Propagate amdgpu-waves-per-eu to callees
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 26 14:57:58 PDT 2020
Author: Stanislav Mekhanoshin
Date: 2020-03-26T14:43:44-07:00
New Revision: 4c4b71843b6fc77b67cd98a3bfbbba67392bfc2e
URL: https://github.com/llvm/llvm-project/commit/4c4b71843b6fc77b67cd98a3bfbbba67392bfc2e
DIFF: https://github.com/llvm/llvm-project/commit/4c4b71843b6fc77b67cd98a3bfbbba67392bfc2e.diff
LOG: [AMDGPU] Propagate amdgpu-waves-per-eu to callees
Differential Revision: https://reviews.llvm.org/D76868
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index 0ad4eebcf3f9..982aae374884 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -48,19 +48,62 @@ extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
namespace {
+// Target features to propagate.
+static constexpr const FeatureBitset TargetFeatures = {
+ AMDGPU::FeatureWavefrontSize16,
+ AMDGPU::FeatureWavefrontSize32,
+ AMDGPU::FeatureWavefrontSize64
+};
+
+// Attributes to propagate.
+static constexpr const char* AttributeNames[] = {
+ "amdgpu-waves-per-eu"
+};
+
+static constexpr unsigned NumAttr =
+ sizeof(AttributeNames) / sizeof(AttributeNames[0]);
+
class AMDGPUPropagateAttributes {
- const FeatureBitset TargetFeatures = {
- AMDGPU::FeatureWavefrontSize16,
- AMDGPU::FeatureWavefrontSize32,
- AMDGPU::FeatureWavefrontSize64
+
+ class FnProperties {
+ private:
+ explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
+
+ public:
+ explicit FnProperties(const TargetMachine &TM, const Function &F) {
+ Features = TM.getSubtargetImpl(F)->getFeatureBits();
+
+ for (unsigned I = 0; I < NumAttr; ++I)
+ if (F.hasFnAttribute(AttributeNames[I]))
+ Attributes[I] = F.getFnAttribute(AttributeNames[I]);
+ }
+
+ bool operator == (const FnProperties &Other) const {
+ if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
+ return false;
+ for (unsigned I = 0; I < NumAttr; ++I)
+ if (Attributes[I] != Other.Attributes[I])
+ return false;
+ return true;
+ }
+
+ FnProperties adjustToCaller(const FnProperties &CallerProps) const {
+ FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
+ for (unsigned I = 0; I < NumAttr; ++I)
+ New.Attributes[I] = CallerProps.Attributes[I];
+ return New;
+ }
+
+ FeatureBitset Features;
+ Optional<Attribute> Attributes[NumAttr];
};
- class Clone{
+ class Clone {
public:
- Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
- FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
+ Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
+ Properties(Props), OrigF(OrigF), NewF(NewF) {}
- FeatureBitset FeatureMask;
+ FnProperties Properties;
Function *OrigF;
Function *NewF;
};
@@ -77,17 +120,19 @@ class AMDGPUPropagateAttributes {
SmallVector<Clone, 32> Clones;
// Find a clone with required features.
- Function *findFunction(const FeatureBitset &FeaturesNeeded,
+ Function *findFunction(const FnProperties &PropsNeeded,
Function *OrigF);
- // Clone function F and set NewFeatures on the clone.
+ // Clone function \p F and set \p NewProps on the clone.
// Cole takes the name of original function.
- Function *cloneWithFeatures(Function &F,
- const FeatureBitset &NewFeatures);
+ Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
// Set new function's features in place.
void setFeatures(Function &F, const FeatureBitset &NewFeatures);
+ // Set new function's attributes in place.
+ void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
+
std::string getFeatureString(const FeatureBitset &Features) const;
// Propagate attributes from Roots.
@@ -155,11 +200,11 @@ INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
false, false)
Function *
-AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
+AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
Function *OrigF) {
// TODO: search for clone's clones.
for (Clone &C : Clones)
- if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
+ if (C.OrigF == OrigF && PropsNeeded == C.Properties)
return C.NewF;
return nullptr;
@@ -195,8 +240,7 @@ bool AMDGPUPropagateAttributes::process() {
if (F.isDeclaration())
continue;
- const FeatureBitset &CalleeBits =
- TM->getSubtargetImpl(F)->getFeatureBits();
+ const FnProperties CalleeProps(*TM, F);
SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
SmallSet<CallBase *, 32> Visited;
@@ -213,32 +257,31 @@ bool AMDGPUPropagateAttributes::process() {
if (!Roots.count(Caller) && !NewRoots.count(Caller))
continue;
- const FeatureBitset &CallerBits =
- TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
+ const FnProperties CallerProps(*TM, *Caller);
- if (CallerBits == (CalleeBits & TargetFeatures)) {
+ if (CalleeProps == CallerProps) {
if (!Roots.count(&F))
NewRoots.insert(&F);
continue;
}
- Function *NewF = findFunction(CallerBits, &F);
+ Function *NewF = findFunction(CallerProps, &F);
if (!NewF) {
- FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
- CallerBits);
+ const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
if (!AllowClone) {
// This may set
diff erent features on
diff erent iteartions if
// there is a contradiction in callers' attributes. In this case
// we rely on a second pass running on Module, which is allowed
// to clone.
- setFeatures(F, NewFeatures);
+ setFeatures(F, NewProps.Features);
+ setAttributes(F, NewProps.Attributes);
NewRoots.insert(&F);
Changed = true;
break;
}
- NewF = cloneWithFeatures(F, NewFeatures);
- Clones.push_back(Clone(CallerBits, &F, NewF));
+ NewF = cloneWithProperties(F, NewProps);
+ Clones.push_back(Clone(CallerProps, &F, NewF));
NewRoots.insert(NewF);
}
@@ -267,13 +310,14 @@ bool AMDGPUPropagateAttributes::process() {
}
Function *
-AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
- const FeatureBitset &NewFeatures) {
+AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
+ const FnProperties &NewProps) {
LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
ValueToValueMapTy dummy;
Function *NewF = CloneFunction(&F, dummy);
- setFeatures(*NewF, NewFeatures);
+ setFeatures(*NewF, NewProps.Features);
+ setAttributes(*NewF, NewProps.Attributes);
NewF->setVisibility(GlobalValue::DefaultVisibility);
NewF->setLinkage(GlobalValue::InternalLinkage);
@@ -300,6 +344,18 @@ void AMDGPUPropagateAttributes::setFeatures(Function &F,
F.addFnAttr("target-features", NewFeatureStr);
}
+void AMDGPUPropagateAttributes::setAttributes(Function &F,
+ const ArrayRef<Optional<Attribute>> NewAttrs) {
+ LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
+ for (unsigned I = 0; I < NumAttr; ++I) {
+ F.removeFnAttr(AttributeNames[I]);
+ if (NewAttrs[I]) {
+ LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
+ F.addFnAttr(*NewAttrs[I]);
+ }
+ }
+}
+
std::string
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
{
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
index 580fb31bc202..438445279b74 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
@@ -55,11 +55,11 @@
; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
-; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
+; OPT: attributes #0 = { {{.*}} "amdgpu-waves-per-eu"="1,1" "target-features"="+wavefrontsize64" }
; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
-; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }
+; OPT: attributes #2 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="+wavefrontsize32" }
; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" }
-; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
+; OPT: attributes #4 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
; LLC: foo3:
; LLC: sample asm
@@ -94,7 +94,7 @@ entry:
ret void
}
-define void @foo3() #1 {
+define void @foo3() #4 {
entry:
call void asm sideeffect "; sample asm", ""()
ret void
@@ -135,7 +135,8 @@ entry:
ret void
}
-attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
-attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
+attributes #0 = { nounwind "target-features"="+wavefrontsize32" "amdgpu-waves-per-eu"="2,4" }
+attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="1,1" }
attributes #2 = { nounwind "target-features"="+wavefrontsize64" }
attributes #3 = { nounwind "target-features"="+wavefrontsize64" }
+attributes #4 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="2,4" }
More information about the llvm-commits
mailing list