[llvm] 4c4b718 - [AMDGPU] Propagate amdgpu-waves-per-eu to callees

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 26 14:57:58 PDT 2020


Author: Stanislav Mekhanoshin
Date: 2020-03-26T14:43:44-07:00
New Revision: 4c4b71843b6fc77b67cd98a3bfbbba67392bfc2e

URL: https://github.com/llvm/llvm-project/commit/4c4b71843b6fc77b67cd98a3bfbbba67392bfc2e
DIFF: https://github.com/llvm/llvm-project/commit/4c4b71843b6fc77b67cd98a3bfbbba67392bfc2e.diff

LOG: [AMDGPU] Propagate amdgpu-waves-per-eu to callees

Differential Revision: https://reviews.llvm.org/D76868

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
    llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
index 0ad4eebcf3f9..982aae374884 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPropagateAttributes.cpp
@@ -48,19 +48,62 @@ extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
 
 namespace {
 
+// Target features to propagate.
+static constexpr const FeatureBitset TargetFeatures = {
+  AMDGPU::FeatureWavefrontSize16,
+  AMDGPU::FeatureWavefrontSize32,
+  AMDGPU::FeatureWavefrontSize64
+};
+
+// Attributes to propagate.
+static constexpr const char* AttributeNames[] = {
+  "amdgpu-waves-per-eu"
+};
+
+static constexpr unsigned NumAttr =
+  sizeof(AttributeNames) / sizeof(AttributeNames[0]);
+
 class AMDGPUPropagateAttributes {
-  const FeatureBitset TargetFeatures = {
-    AMDGPU::FeatureWavefrontSize16,
-    AMDGPU::FeatureWavefrontSize32,
-    AMDGPU::FeatureWavefrontSize64
+
+  class FnProperties {
+  private:
+    explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
+
+  public:
+    explicit FnProperties(const TargetMachine &TM, const Function &F) {
+      Features = TM.getSubtargetImpl(F)->getFeatureBits();
+
+      for (unsigned I = 0; I < NumAttr; ++I)
+        if (F.hasFnAttribute(AttributeNames[I]))
+          Attributes[I] = F.getFnAttribute(AttributeNames[I]);
+    }
+
+    bool operator == (const FnProperties &Other) const {
+      if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
+        return false;
+      for (unsigned I = 0; I < NumAttr; ++I)
+        if (Attributes[I] != Other.Attributes[I])
+          return false;
+      return true;
+    }
+
+    FnProperties adjustToCaller(const FnProperties &CallerProps) const {
+      FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
+      for (unsigned I = 0; I < NumAttr; ++I)
+        New.Attributes[I] = CallerProps.Attributes[I];
+      return New;
+    }
+
+    FeatureBitset Features;
+    Optional<Attribute> Attributes[NumAttr];
   };
 
-  class Clone{
+  class Clone {
   public:
-    Clone(FeatureBitset FeatureMask, Function *OrigF, Function *NewF) :
-      FeatureMask(FeatureMask), OrigF(OrigF), NewF(NewF) {}
+    Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
+      Properties(Props), OrigF(OrigF), NewF(NewF) {}
 
-    FeatureBitset FeatureMask;
+    FnProperties Properties;
     Function *OrigF;
     Function *NewF;
   };
@@ -77,17 +120,19 @@ class AMDGPUPropagateAttributes {
   SmallVector<Clone, 32> Clones;
 
   // Find a clone with required features.
-  Function *findFunction(const FeatureBitset &FeaturesNeeded,
+  Function *findFunction(const FnProperties &PropsNeeded,
                          Function *OrigF);
 
-  // Clone function F and set NewFeatures on the clone.
+  // Clone function \p F and set \p NewProps on the clone.
   // Cole takes the name of original function.
-  Function *cloneWithFeatures(Function &F,
-                              const FeatureBitset &NewFeatures);
+  Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
 
   // Set new function's features in place.
   void setFeatures(Function &F, const FeatureBitset &NewFeatures);
 
+  // Set new function's attributes in place.
+  void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
+
   std::string getFeatureString(const FeatureBitset &Features) const;
 
   // Propagate attributes from Roots.
@@ -155,11 +200,11 @@ INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
                 false, false)
 
 Function *
-AMDGPUPropagateAttributes::findFunction(const FeatureBitset &FeaturesNeeded,
+AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
                                         Function *OrigF) {
   // TODO: search for clone's clones.
   for (Clone &C : Clones)
-    if (C.OrigF == OrigF && FeaturesNeeded == C.FeatureMask)
+    if (C.OrigF == OrigF && PropsNeeded == C.Properties)
       return C.NewF;
 
   return nullptr;
@@ -195,8 +240,7 @@ bool AMDGPUPropagateAttributes::process() {
       if (F.isDeclaration())
         continue;
 
-      const FeatureBitset &CalleeBits =
-        TM->getSubtargetImpl(F)->getFeatureBits();
+      const FnProperties CalleeProps(*TM, F);
       SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
       SmallSet<CallBase *, 32> Visited;
 
@@ -213,32 +257,31 @@ bool AMDGPUPropagateAttributes::process() {
         if (!Roots.count(Caller) && !NewRoots.count(Caller))
           continue;
 
-        const FeatureBitset &CallerBits =
-          TM->getSubtargetImpl(*Caller)->getFeatureBits() & TargetFeatures;
+        const FnProperties CallerProps(*TM, *Caller);
 
-        if (CallerBits == (CalleeBits  & TargetFeatures)) {
+        if (CalleeProps == CallerProps) {
           if (!Roots.count(&F))
             NewRoots.insert(&F);
           continue;
         }
 
-        Function *NewF = findFunction(CallerBits, &F);
+        Function *NewF = findFunction(CallerProps, &F);
         if (!NewF) {
-          FeatureBitset NewFeatures((CalleeBits & ~TargetFeatures) |
-                                    CallerBits);
+          const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
           if (!AllowClone) {
             // This may set 
diff erent features on 
diff erent iteartions if
             // there is a contradiction in callers' attributes. In this case
             // we rely on a second pass running on Module, which is allowed
             // to clone.
-            setFeatures(F, NewFeatures);
+            setFeatures(F, NewProps.Features);
+            setAttributes(F, NewProps.Attributes);
             NewRoots.insert(&F);
             Changed = true;
             break;
           }
 
-          NewF = cloneWithFeatures(F, NewFeatures);
-          Clones.push_back(Clone(CallerBits, &F, NewF));
+          NewF = cloneWithProperties(F, NewProps);
+          Clones.push_back(Clone(CallerProps, &F, NewF));
           NewRoots.insert(NewF);
         }
 
@@ -267,13 +310,14 @@ bool AMDGPUPropagateAttributes::process() {
 }
 
 Function *
-AMDGPUPropagateAttributes::cloneWithFeatures(Function &F,
-                                             const FeatureBitset &NewFeatures) {
+AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
+                                               const FnProperties &NewProps) {
   LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
 
   ValueToValueMapTy dummy;
   Function *NewF = CloneFunction(&F, dummy);
-  setFeatures(*NewF, NewFeatures);
+  setFeatures(*NewF, NewProps.Features);
+  setAttributes(*NewF, NewProps.Attributes);
   NewF->setVisibility(GlobalValue::DefaultVisibility);
   NewF->setLinkage(GlobalValue::InternalLinkage);
 
@@ -300,6 +344,18 @@ void AMDGPUPropagateAttributes::setFeatures(Function &F,
   F.addFnAttr("target-features", NewFeatureStr);
 }
 
+void AMDGPUPropagateAttributes::setAttributes(Function &F,
+    const ArrayRef<Optional<Attribute>> NewAttrs) {
+  LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
+  for (unsigned I = 0; I < NumAttr; ++I) {
+    F.removeFnAttr(AttributeNames[I]);
+    if (NewAttrs[I]) {
+      LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
+      F.addFnAttr(*NewAttrs[I]);
+    }
+  }
+}
+
 std::string
 AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
 {

diff  --git a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
index 580fb31bc202..438445279b74 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-attributes-clone.ll
@@ -55,11 +55,11 @@
 ; OPT-INT: define internal fastcc void @foo3() unnamed_addr #4
 ; OPT-EXT: define internal fastcc void @foo2.3() unnamed_addr #4
 ; OPT-INT: define internal fastcc void @foo2() unnamed_addr #4
-; OPT: attributes #0 = { {{.*}} "target-features"="+wavefrontsize64" }
+; OPT: attributes #0 = { {{.*}} "amdgpu-waves-per-eu"="1,1" "target-features"="+wavefrontsize64" }
 ; OPT: attributes #1 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,-wavefrontsize32,+wavefrontsize64{{.*}}" }
-; OPT: attributes #2 = { {{.*}} "target-features"="+wavefrontsize32" }
+; OPT: attributes #2 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="+wavefrontsize32" }
 ; OPT: attributes #3 = { {{.*}} "target-features"="+wavefrontsize64" }
-; OPT: attributes #4 = { {{.*}} "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
+; OPT: attributes #4 = { {{.*}} "amdgpu-waves-per-eu"="2,4" "target-features"="{{.*}},-wavefrontsize16,+wavefrontsize32,-wavefrontsize64{{.*}}" }
 
 ; LLC: foo3:
 ; LLC: sample asm
@@ -94,7 +94,7 @@ entry:
   ret void
 }
 
-define void @foo3() #1 {
+define void @foo3() #4 {
 entry:
   call void asm sideeffect "; sample asm", ""()
   ret void
@@ -135,7 +135,8 @@ entry:
   ret void
 }
 
-attributes #0 = { nounwind "target-features"="+wavefrontsize32" }
-attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" }
+attributes #0 = { nounwind "target-features"="+wavefrontsize32" "amdgpu-waves-per-eu"="2,4" }
+attributes #1 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="1,1" }
 attributes #2 = { nounwind "target-features"="+wavefrontsize64" }
 attributes #3 = { nounwind "target-features"="+wavefrontsize64" }
+attributes #4 = { noinline nounwind "target-features"="+wavefrontsize64" "amdgpu-waves-per-eu"="2,4" }


        


More information about the llvm-commits mailing list