[llvm-branch-commits] [llvm] 659c7bc - [LoopRotate] Use llvm.experimental.noalias.scope.decl for duplicating noalias metadata as needed

Jeroen Dobbelaere via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Sun Jan 24 05:00:17 PST 2021


Author: Jeroen Dobbelaere
Date: 2021-01-24T13:53:13+01:00
New Revision: 659c7bcde62e96c84f157b1d4ac4f320c56089a1

URL: https://github.com/llvm/llvm-project/commit/659c7bcde62e96c84f157b1d4ac4f320c56089a1
DIFF: https://github.com/llvm/llvm-project/commit/659c7bcde62e96c84f157b1d4ac4f320c56089a1.diff

LOG: [LoopRotate] Use llvm.experimental.noalias.scope.decl for duplicating noalias metadata as needed

Similar to D92887, LoopRotation also needs duplicate the noalias scopes when rotating a `@llvm.experimental.noalias.scope.decl` across a block boundary.
This is based on the version from the Full Restrict paches (D68511).

The problem it fixes also showed up in Transforms/Coroutines/ex5.ll after D93040 (when enabling strict checking with -verify-noalias-scope-decl-dom).

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D94306

Added: 
    llvm/test/Transforms/LoopRotate/noalias.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/Cloning.h
    llvm/lib/Transforms/Utils/CloneFunction.cpp
    llvm/lib/Transforms/Utils/LoopRotationUtils.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index e43c43cf76e7..16062fb2f5f5 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -302,6 +302,13 @@ void adaptNoAliasScopes(
 void cloneAndAdaptNoAliasScopes(ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
                                 ArrayRef<BasicBlock *> NewBlocks,
                                 LLVMContext &Context, StringRef Ext);
+
+/// Clone the specified noalias decl scopes. Then adapt all instructions in the
+/// [IStart, IEnd] (IEnd included !) range to the cloned versions. 'Ext' will be
+/// added to the duplicate scope names.
+void cloneAndAdaptNoAliasScopes(ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
+                                Instruction *IStart, Instruction *IEnd,
+                                LLVMContext &Context, StringRef Ext);
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_CLONING_H

diff  --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 3ff1e59f8eb1..ac474fbac7b3 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -968,6 +968,28 @@ void llvm::cloneAndAdaptNoAliasScopes(
       adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context);
 }
 
+void llvm::cloneAndAdaptNoAliasScopes(
+    ArrayRef<MetadataAsValue *> NoAliasDeclScopes, Instruction *IStart,
+    Instruction *IEnd, LLVMContext &Context, StringRef Ext) {
+  if (NoAliasDeclScopes.empty())
+    return;
+
+  DenseMap<MDNode *, MDNode *> ClonedScopes;
+  DenseMap<MetadataAsValue *, MetadataAsValue *> ClonedMVScopes;
+  LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+                    << NoAliasDeclScopes.size() << " node(s)\n");
+
+  cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext,
+                     Context);
+  // Identify instructions using metadata that needs adaptation
+  assert(IStart->getParent() == IEnd->getParent() && "
diff erent basic block ?");
+  auto ItStart = IStart->getIterator();
+  auto ItEnd = IEnd->getIterator();
+  ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range
+  for (auto &I : llvm::make_range(ItStart, ItEnd))
+    adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context);
+}
+
 void llvm::identifyNoAliasScopesToClone(
     ArrayRef<BasicBlock *> BBs,
     SmallVectorImpl<MetadataAsValue *> &NoAliasDeclScopes) {

diff  --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 850170960937..8192092822aa 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -400,6 +401,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
         break;
     }
 
+    // Remember the local noalias scope declarations in the header. After the
+    // rotation, they must be duplicated and the scope must be cloned. This
+    // avoids unwanted interaction across iterations.
+    SmallVector<Instruction *, 6> NoAliasDeclInstructions;
+    for (Instruction &I : *OrigHeader)
+      if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+        NoAliasDeclInstructions.push_back(Decl);
+
     while (I != E) {
       Instruction *Inst = &*I++;
 
@@ -460,6 +469,70 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
       }
     }
 
+    if (!NoAliasDeclInstructions.empty()) {
+      // There are noalias scope declarations:
+      // (general):
+      // Original:    OrigPre              { OrigHeader NewHeader ... Latch }
+      // after:      (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader }
+      //
+      // with D: llvm.experimental.noalias.scope.decl,
+      //      U: !noalias or !alias.scope depending on D
+      //       ... { D U1 U2 }   can transform into:
+      // (0) : ... { D U1 U2 }        // no relevant rotation for this part
+      // (1) : ... D' { U1 U2 D }     // D is part of OrigHeader
+      // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader
+      //
+      // We now want to transform:
+      // (1) -> : ... D' { D U1 U2 D'' }
+      // (2) -> : ... D' U1' { D U2 D'' U1'' }
+      // D: original llvm.experimental.noalias.scope.decl
+      // D', U1': duplicate with replaced scopes
+      // D'', U1'': 
diff erent duplicate with replaced scopes
+      // This ensures a safe fallback to 'may_alias' introduced by the rotate,
+      // as U1'' and U1' scopes will not be compatible wrt to the local restrict
+
+      // Clone the llvm.experimental.noalias.decl again for the NewHeader.
+      Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI());
+      for (Instruction *NAD : NoAliasDeclInstructions) {
+        LLVM_DEBUG(dbgs() << "  Cloning llvm.experimental.noalias.scope.decl:"
+                          << *NAD << "\n");
+        Instruction *NewNAD = NAD->clone();
+        NewNAD->insertBefore(NewHeaderInsertionPoint);
+      }
+
+      // Scopes must now be duplicated, once for OrigHeader and once for
+      // OrigPreHeader'.
+      {
+        auto &Context = NewHeader->getContext();
+
+        SmallVector<MetadataAsValue *, 8> NoAliasDeclScopes;
+        for (Instruction *NAD : NoAliasDeclInstructions)
+          NoAliasDeclScopes.push_back(cast<MetadataAsValue>(
+              NAD->getOperand(Intrinsic::NoAliasScopeDeclScopeArg)));
+
+        LLVM_DEBUG(dbgs() << "  Updating OrigHeader scopes\n");
+        cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context,
+                                   "h.rot");
+        LLVM_DEBUG(OrigHeader->dump());
+
+        // Keep the compile time impact low by only adapting the inserted block
+        // of instructions in the OrigPreHeader. This might result in slightly
+        // more aliasing between these instructions and those that were already
+        // present, but it will be much faster when the original PreHeader is
+        // large.
+        LLVM_DEBUG(dbgs() << "  Updating part of OrigPreheader scopes\n");
+        auto *FirstDecl =
+            cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]);
+        auto *LastInst = &OrigPreheader->back();
+        cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst,
+                                   Context, "pre.rot");
+        LLVM_DEBUG(OrigPreheader->dump());
+
+        LLVM_DEBUG(dbgs() << "  Updated NewHeader:\n");
+        LLVM_DEBUG(NewHeader->dump());
+      }
+    }
+
     // Along with all the other instructions, we just cloned OrigHeader's
     // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
     // successors by duplicating their incoming values for OrigHeader.

diff  --git a/llvm/test/Transforms/LoopRotate/noalias.ll b/llvm/test/Transforms/LoopRotate/noalias.ll
new file mode 100644
index 000000000000..9f2dbb85bf96
--- /dev/null
+++ b/llvm/test/Transforms/LoopRotate/noalias.ll
@@ -0,0 +1,185 @@
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
+; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+; RUN: opt -S -passes='require<targetir>,require<assumptions>,loop(loop-rotate)' < %s | FileCheck %s
+; RUN: opt -S -passes='require<targetir>,require<assumptions>,loop(loop-rotate)' -enable-mssa-loop-dependency=true -verify-memoryssa  < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @g(i32*)
+
+define void @test_02(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_02(
+; CHECK: entry:
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+; CHECK:   store i32 42, i32* %_pA, align 16, !alias.scope !2
+; CHECK: for.body:
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK:   store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !7)
+; CHECK:   store i32 42, i32* %_pA, align 16, !alias.scope !7
+; CHECK: for.end:
+
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+  store i32 42, i32* %_pA, align 16, !alias.scope !2
+  %cmp = icmp slt i32 %i.0, 100
+  %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %arrayidx, align 16, !noalias !2
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+  call void @g(i32* %arrayidx.lcssa) nounwind
+  ret void
+}
+
+define void @test_03(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_03(
+; CHECK: entry:
+; CHECK: for.body:
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK:   store i32 42, i32* %_pA, align 16, !alias.scope !5
+; CHECK:   store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK: for.end:
+
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %cmp = icmp slt i32 %i.0, 100
+  %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+  store i32 42, i32* %_pA, align 16, !alias.scope !2
+  store i32 0, i32* %arrayidx, align 16, !noalias !2
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+  call void @g(i32* %arrayidx.lcssa) nounwind
+  ret void
+}
+
+define void @test_04(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_04(
+; CHECK: entry:
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !9)
+; CHECK:   store i32 42, i32* %_pA, align 16, !alias.scope !9
+; CHECK: for.body:
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK:   store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK:   store i32 43, i32* %_pA, align 16, !alias.scope !5
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !11)
+; CHECK:   store i32 42, i32* %_pA, align 16, !alias.scope !11
+; CHECK: for.end:
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+  store i32 42, i32* %_pA, align 16, !alias.scope !2
+  %cmp = icmp slt i32 %i.0, 100
+  %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %arrayidx, align 16, !noalias !2
+  store i32 43, i32* %_pA, align 16, !alias.scope !2
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+  call void @g(i32* %arrayidx.lcssa) nounwind
+  ret void
+}
+
+define void @test_05(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_05(
+; CHECK: entry:
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !13)
+; CHECK:   store i32 42, i32* %_pA, align 16, !alias.scope !13
+; CHECK: for.body:
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK:   store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK:   store i32 43, i32* %_pA, align 16, !alias.scope !5
+; CHECK:   tail call void @llvm.experimental.noalias.scope.decl(metadata !15)
+; CHECK:   store i32 42, i32* %_pA, align 16, !alias.scope !15
+; CHECK: for.end:
+; CHECK:   store i32 44, i32* %_pA, align 16, !alias.scope !5
+
+entry:
+  %array = alloca [20 x i32], align 16
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.body, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+  store i32 42, i32* %_pA, align 16, !alias.scope !2
+  %cmp = icmp slt i32 %i.0, 100
+  %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  store i32 0, i32* %arrayidx, align 16, !noalias !2
+  store i32 43, i32* %_pA, align 16, !alias.scope !2
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+  store i32 44, i32* %_pA, align 16, !alias.scope !2
+  call void @g(i32* %arrayidx.lcssa) nounwind
+  ret void
+}
+
+; Function Attrs: inaccessiblememonly nounwind
+declare void @llvm.experimental.noalias.scope.decl(metadata) #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { inaccessiblememonly nounwind }
+attributes #2 = { nounwind readnone speculatable }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3}
+!3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA"}
+!4 = distinct !{!4, !"test_loop_rotate_XX"}
+
+; CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
+; CHECK: !1 = !{!"clang"}
+; CHECK: !2 = !{!3}
+; CHECK: !3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA:pre.rot"}
+; CHECK: !4 = distinct !{!4, !"test_loop_rotate_XX"}
+; CHECK: !5 = !{!6}
+; CHECK: !6 = distinct !{!6, !4, !"test_loop_rotate_XX: pA"}
+; CHECK: !7 = !{!8}
+; CHECK: !8 = distinct !{!8, !4, !"test_loop_rotate_XX: pA:h.rot"}
+; CHECK: !9 = !{!10}
+; CHECK: !10 = distinct !{!10, !4, !"test_loop_rotate_XX: pA:pre.rot"}
+; CHECK: !11 = !{!12}
+; CHECK: !12 = distinct !{!12, !4, !"test_loop_rotate_XX: pA:h.rot"}
+; CHECK: !13 = !{!14}
+; CHECK: !14 = distinct !{!14, !4, !"test_loop_rotate_XX: pA:pre.rot"}
+; CHECK: !15 = !{!16}
+; CHECK: !16 = distinct !{!16, !4, !"test_loop_rotate_XX: pA:h.rot"}


        


More information about the llvm-branch-commits mailing list