[llvm] 35393c8 - [funcattrs] Infer nosync from instruction walk

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 8 14:05:06 PDT 2021


Author: Philip Reames
Date: 2021-04-08T14:05:00-07:00
New Revision: 35393c865c2cb1afbf1315a9d48ac41994cd1344

URL: https://github.com/llvm/llvm-project/commit/35393c865c2cb1afbf1315a9d48ac41994cd1344
DIFF: https://github.com/llvm/llvm-project/commit/35393c865c2cb1afbf1315a9d48ac41994cd1344.diff

LOG: [funcattrs] Infer nosync from instruction walk

Pretty straightforward use of existing infrastructure and port of the attributor inference rules for nosync.

A couple points of interest:
* I deliberately switched from "monotonic or better" to "unordered or better". This is simply me being conservative and is better in line with the rest of the optimizer. We treat monotonic conservatively pretty much everywhere.
* The operand bundle test change is suspicious. It looks like we might have missed something here, but if so, it's an issue with the existing nofree inference as well. I'm going to take a closer look at that separately.
* I needed to keep the previous inference from readnone. This surprised me, but made sense once I realized readonly inference goes to lengths to reason about local vs non-local memory and that writes to local memory are okay. This is fine for the purpose of nosync, but would e.g. prevent us from inferring nofree from readnone - which is slightly surprising.

Differential Revision: https://reviews.llvm.org/D99769

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/FunctionAttrs.cpp
    llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
    llvm/test/CodeGen/AMDGPU/inline-attr.ll
    llvm/test/Transforms/FunctionAttrs/nofree.ll
    llvm/test/Transforms/FunctionAttrs/nosync.ll
    llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
    llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
    llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index f76dd584ec7ce..cfd302a536c6f 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1476,25 +1476,100 @@ static bool addWillReturn(const SCCNodeSet &SCCNodes) {
   return Changed;
 }
 
-// Infer the nosync attribute.  For the moment, the inference is trivial
-// and relies on the readnone attribute already being infered.  This will
-// be replaced with a more robust implementation in the near future.
+// Return true if this is an atomic which has an ordering stronger than
+// unordered.  Note that this is 
diff erent than the predicate we use in
+// Attributor.  Here we chose to be conservative and consider monotonic
+// operations potentially synchronizing.  We generally don't do much with
+// monotonic operations, so this is simply risk reduction.
+static bool isOrderedAtomic(Instruction *I) {
+  if (!I->isAtomic())
+    return false;
+
+  if (auto *FI = dyn_cast<FenceInst>(I))
+    // All legal orderings for fence are stronger than monotonic.
+    return FI->getSyncScopeID() != SyncScope::SingleThread;
+  else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
+    return true;
+  else if (auto *SI = dyn_cast<StoreInst>(I))
+    return !SI->isUnordered();
+  else if (auto *LI = dyn_cast<LoadInst>(I))
+    return !LI->isUnordered();
+  else {
+    llvm_unreachable("unknown atomic instruction?");
+  }
+}
+
+static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
+  // Volatile may synchronize
+  if (I.isVolatile())
+    return true;
+
+  // An ordered atomic may synchronize.  (See comment about on monotonic.)
+  if (isOrderedAtomic(&I))
+    return true;
+
+  auto *CB = dyn_cast<CallBase>(&I);
+  if (!CB)
+    // Non call site cases covered by the two checks above
+    return false;
+
+  if (CB->hasFnAttr(Attribute::NoSync))
+    return false;
+
+  // readnone + not convergent implies nosync
+  // (This is needed to initialize inference from declarations which aren't
+  //  explicitly nosync, but are readnone and not convergent.)
+  if (CB->hasFnAttr(Attribute::ReadNone) &&
+      !CB->hasFnAttr(Attribute::Convergent))
+    return false;
+
+  // Non volatile memset/memcpy/memmoves are nosync
+  // NOTE: Only intrinsics with volatile flags should be handled here.  All
+  // others should be marked in Intrinsics.td.
+  if (auto *MI = dyn_cast<MemIntrinsic>(&I))
+    if (!MI->isVolatile())
+      return false;
+
+  // Speculatively assume in SCC.
+  if (Function *Callee = CB->getCalledFunction())
+    if (SCCNodes.contains(Callee))
+      return false;
+
+  return true;
+}
+
+// Infer the nosync attribute.
 static bool addNoSyncAttr(const SCCNodeSet &SCCNodes) {
-  bool Changed = false;
+  AttributeInferer AI;
+  AI.registerAttrInference(AttributeInferer::InferenceDescriptor{
+      Attribute::NoSync,
+      // Skip already marked functions.
+      [](const Function &F) { return F.hasNoSync(); },
+      // Instructions that break nosync assumption.
+      [&SCCNodes](Instruction &I) {
+        return InstrBreaksNoSync(I, SCCNodes);
+      },
+      [](Function &F) {
+        LLVM_DEBUG(dbgs()
+                   << "Adding nosync attr to fn " << F.getName() << "\n");
+        F.setNoSync();
+        ++NumNoSync;
+      },
+      /* RequiresExactDefinition= */ true});
+  bool Changed = AI.run(SCCNodes);
 
+  // readnone + not convergent implies nosync
+  // (This is here so that we don't have to duplicate the function local
+  //  memory reasoning of the readnone analysis.)
   for (Function *F : SCCNodes) {
     if (!F || F->hasNoSync())
       continue;
-
-    // readnone + not convergent implies nosync
     if (!F->doesNotAccessMemory() || F->isConvergent())
       continue;
-
     F->setNoSync();
     NumNoSync++;
     Changed = true;
   }
-
   return Changed;
 }
 

diff  --git a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
index d0f142b1cef52..61a46337898fe 100644
--- a/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
+++ b/llvm/test/Analysis/TypeBasedAliasAnalysis/functionattrs.ll
@@ -73,12 +73,12 @@ declare void @callee(i32* %p) nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i1) nounwind
 
 ; CHECK: attributes #0 = { norecurse nosync nounwind readnone willreturn }
-; CHECK: attributes #1 = { nofree norecurse nounwind willreturn writeonly }
+; CHECK: attributes #1 = { nofree norecurse nosync nounwind willreturn writeonly }
 ; CHECK: attributes #2 = { nounwind readonly }
 ; CHECK: attributes #3 = { nounwind }
 ; CHECK: attributes #4 = { nosync nounwind readnone willreturn }
-; CHECK: attributes #5 = { nofree nounwind willreturn }
-; CHECK: attributes #6 = { nofree norecurse nounwind willreturn }
+; CHECK: attributes #5 = { nofree nosync nounwind willreturn }
+; CHECK: attributes #6 = { nofree norecurse nosync nounwind willreturn }
 ; CHECK: attributes #7 = { argmemonly nofree nosync nounwind willreturn }
 
 ; Root note.

diff  --git a/llvm/test/CodeGen/AMDGPU/inline-attr.ll b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
index 540cc2fdc9414..acf04a95db3c8 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-attr.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-attr.ll
@@ -7,13 +7,13 @@
 ; GCN: %mul.i = fmul float %load, 1.500000e+01
 
 ; UNSAFE: attributes #0 = { norecurse nosync nounwind readnone willreturn "unsafe-fp-math"="true" }
-; UNSAFE: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
+; UNSAFE: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="true" }
 
 ; NOINFS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-infs-fp-math"="true" }
-; NOINFS: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
+; NOINFS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="true" "no-nans-fp-math"="false" "unsafe-fp-math"="false" }
 
 ; NONANS: attributes #0 = { norecurse nosync nounwind readnone willreturn "no-nans-fp-math"="true" }
-; NONANS: attributes #1 = { nofree norecurse nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
+; NONANS: attributes #1 = { nofree norecurse nosync nounwind willreturn "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="true" "unsafe-fp-math"="false" }
 
 define float @foo(float %x) #0 {
 entry:

diff  --git a/llvm/test/Transforms/FunctionAttrs/nofree.ll b/llvm/test/Transforms/FunctionAttrs/nofree.ll
index 028c11502b7e6..a51f8468a56e7 100644
--- a/llvm/test/Transforms/FunctionAttrs/nofree.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nofree.ll
@@ -36,7 +36,7 @@ entry:
 declare void @free(i8* nocapture) local_unnamed_addr #2
 
 define i32 @_Z4foo3Pi(i32* nocapture readonly %a) local_unnamed_addr #3 {
-; CHECK: Function Attrs: norecurse nounwind readonly uwtable willreturn
+; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn
 ; CHECK-LABEL: @_Z4foo3Pi(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4

diff  --git a/llvm/test/Transforms/FunctionAttrs/nosync.ll b/llvm/test/Transforms/FunctionAttrs/nosync.ll
index fc532b73e62a1..aed1f3669afc8 100644
--- a/llvm/test/Transforms/FunctionAttrs/nosync.ll
+++ b/llvm/test/Transforms/FunctionAttrs/nosync.ll
@@ -93,7 +93,7 @@ define void @test8(i8* %p) {
 
 ; singlethread fences are okay
 define void @test9(i8* %p) {
-; CHECK: Function Attrs: nofree norecurse nounwind willreturn
+; CHECK: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; CHECK-LABEL: @test9(
 ; CHECK-NEXT:    fence syncscope("singlethread") seq_cst
 ; CHECK-NEXT:    ret void
@@ -137,7 +137,7 @@ define i32 @load_acquire(i32* nocapture readonly %0) norecurse nounwind uwtable
 }
 
 define i32 @load_unordered(i32* nocapture readonly %0) norecurse nounwind uwtable {
-; CHECK: Function Attrs: norecurse nounwind readonly uwtable willreturn
+; CHECK: Function Attrs: norecurse nosync nounwind readonly uwtable willreturn
 ; CHECK-LABEL: @load_unordered(
 ; CHECK-NEXT:    [[TMP2:%.*]] = load atomic i32, i32* [[TMP0:%.*]] unordered, align 4
 ; CHECK-NEXT:    ret i32 [[TMP2]]
@@ -148,7 +148,7 @@ define i32 @load_unordered(i32* nocapture readonly %0) norecurse nounwind uwtabl
 
 ; atomic store with unordered ordering.
 define void @store_unordered(i32* nocapture %0) norecurse nounwind uwtable {
-; CHECK: Function Attrs: nofree norecurse nounwind uwtable willreturn writeonly
+; CHECK: Function Attrs: nofree norecurse nosync nounwind uwtable willreturn writeonly
 ; CHECK-LABEL: @store_unordered(
 ; CHECK-NEXT:    store atomic i32 10, i32* [[TMP0:%.*]] unordered, align 4
 ; CHECK-NEXT:    ret void
@@ -209,9 +209,9 @@ define i32 @volatile_load(i32* %0) norecurse nounwind uwtable {
 declare void @nosync_function() noinline nounwind uwtable nosync
 
 define void @call_nosync_function() nounwind uwtable noinline {
-; CHECK: Function Attrs: noinline nounwind uwtable
+; CHECK: Function Attrs: noinline nosync nounwind uwtable
 ; CHECK-LABEL: @call_nosync_function(
-; CHECK-NEXT:    tail call void @nosync_function() #[[ATTR7:[0-9]+]]
+; CHECK-NEXT:    tail call void @nosync_function() #[[ATTR8:[0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @nosync_function() noinline nounwind uwtable
@@ -225,7 +225,7 @@ declare void @might_sync() noinline nounwind uwtable
 define void @call_might_sync() nounwind uwtable noinline {
 ; CHECK: Function Attrs: noinline nounwind uwtable
 ; CHECK-LABEL: @call_might_sync(
-; CHECK-NEXT:    tail call void @might_sync() #[[ATTR7]]
+; CHECK-NEXT:    tail call void @might_sync() #[[ATTR8]]
 ; CHECK-NEXT:    ret void
 ;
   tail call void @might_sync() noinline nounwind uwtable
@@ -248,7 +248,7 @@ define i32 @memcpy_volatile(i8* %ptr1, i8* %ptr2) {
 
 ; positive, non-volatile intrinsic.
 define i32 @memset_non_volatile(i8* %ptr1, i8 %val) {
-; CHECK: Function Attrs: nofree nounwind willreturn writeonly
+; CHECK: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; CHECK-LABEL: @memset_non_volatile(
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i32(i8* [[PTR1:%.*]], i8 [[VAL:%.*]], i32 8, i1 false)
 ; CHECK-NEXT:    ret i32 4

diff  --git a/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll b/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
index 78cfd03d5ac5c..92384a60dbda0 100644
--- a/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
+++ b/llvm/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
@@ -14,4 +14,4 @@ define void @g() {
 }
 
 
-; CHECK: attributes #0 = { nofree nounwind }
+; CHECK: attributes #0 = { nofree nosync nounwind }

diff  --git a/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
index 968a290b988c5..578fc417ab027 100644
--- a/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
+++ b/llvm/test/Transforms/FunctionAttrs/read-write-scc.ll
@@ -17,4 +17,4 @@ define void @bar() {
   ret void
 }
 
-; CHECK: attributes #0 = { nofree nounwind }
+; CHECK: attributes #0 = { nofree nosync nounwind }

diff  --git a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
index 1afd97e44e9d7..6b50b4870c5d4 100644
--- a/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
+++ b/llvm/test/Transforms/InferFunctionAttrs/norecurse_debug.ll
@@ -52,5 +52,5 @@ attributes #1 = { nounwind readnone speculatable }
 !28 = !DILocation(line: 9, column: 18, scope: !2)
 !29 = !DILocation(line: 10, column: 1, scope: !2)
 
-; CHECK: attributes #0 = { nofree norecurse nounwind willreturn }
+; CHECK: attributes #0 = { nofree norecurse nosync nounwind willreturn }
 ; CHECK-NOT: foo.coefficient1


        


More information about the llvm-commits mailing list