[llvm-branch-commits] [llvm] 659c7bc - [LoopRotate] Use llvm.experimental.noalias.scope.decl for duplicating noalias metadata as needed
Jeroen Dobbelaere via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sun Jan 24 05:00:17 PST 2021
Author: Jeroen Dobbelaere
Date: 2021-01-24T13:53:13+01:00
New Revision: 659c7bcde62e96c84f157b1d4ac4f320c56089a1
URL: https://github.com/llvm/llvm-project/commit/659c7bcde62e96c84f157b1d4ac4f320c56089a1
DIFF: https://github.com/llvm/llvm-project/commit/659c7bcde62e96c84f157b1d4ac4f320c56089a1.diff
LOG: [LoopRotate] Use llvm.experimental.noalias.scope.decl for duplicating noalias metadata as needed
Similar to D92887, LoopRotation also needs duplicate the noalias scopes when rotating a `@llvm.experimental.noalias.scope.decl` across a block boundary.
This is based on the version from the Full Restrict paches (D68511).
The problem it fixes also showed up in Transforms/Coroutines/ex5.ll after D93040 (when enabling strict checking with -verify-noalias-scope-decl-dom).
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D94306
Added:
llvm/test/Transforms/LoopRotate/noalias.ll
Modified:
llvm/include/llvm/Transforms/Utils/Cloning.h
llvm/lib/Transforms/Utils/CloneFunction.cpp
llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h
index e43c43cf76e7..16062fb2f5f5 100644
--- a/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -302,6 +302,13 @@ void adaptNoAliasScopes(
void cloneAndAdaptNoAliasScopes(ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
ArrayRef<BasicBlock *> NewBlocks,
LLVMContext &Context, StringRef Ext);
+
+/// Clone the specified noalias decl scopes. Then adapt all instructions in the
+/// [IStart, IEnd] (IEnd included !) range to the cloned versions. 'Ext' will be
+/// added to the duplicate scope names.
+void cloneAndAdaptNoAliasScopes(ArrayRef<MetadataAsValue *> NoAliasDeclScopes,
+ Instruction *IStart, Instruction *IEnd,
+ LLVMContext &Context, StringRef Ext);
} // end namespace llvm
#endif // LLVM_TRANSFORMS_UTILS_CLONING_H
diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp
index 3ff1e59f8eb1..ac474fbac7b3 100644
--- a/llvm/lib/Transforms/Utils/CloneFunction.cpp
+++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp
@@ -968,6 +968,28 @@ void llvm::cloneAndAdaptNoAliasScopes(
adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context);
}
+void llvm::cloneAndAdaptNoAliasScopes(
+ ArrayRef<MetadataAsValue *> NoAliasDeclScopes, Instruction *IStart,
+ Instruction *IEnd, LLVMContext &Context, StringRef Ext) {
+ if (NoAliasDeclScopes.empty())
+ return;
+
+ DenseMap<MDNode *, MDNode *> ClonedScopes;
+ DenseMap<MetadataAsValue *, MetadataAsValue *> ClonedMVScopes;
+ LLVM_DEBUG(dbgs() << "cloneAndAdaptNoAliasScopes: cloning "
+ << NoAliasDeclScopes.size() << " node(s)\n");
+
+ cloneNoAliasScopes(NoAliasDeclScopes, ClonedScopes, ClonedMVScopes, Ext,
+ Context);
+ // Identify instructions using metadata that needs adaptation
+ assert(IStart->getParent() == IEnd->getParent() && "
diff erent basic block ?");
+ auto ItStart = IStart->getIterator();
+ auto ItEnd = IEnd->getIterator();
+ ++ItEnd; // IEnd is included, increment ItEnd to get the end of the range
+ for (auto &I : llvm::make_range(ItStart, ItEnd))
+ adaptNoAliasScopes(&I, ClonedScopes, ClonedMVScopes, Context);
+}
+
void llvm::identifyNoAliasScopesToClone(
ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<MetadataAsValue *> &NoAliasDeclScopes) {
diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 850170960937..8192092822aa 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -35,6 +35,7 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/LoopUtils.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -400,6 +401,14 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
break;
}
+ // Remember the local noalias scope declarations in the header. After the
+ // rotation, they must be duplicated and the scope must be cloned. This
+ // avoids unwanted interaction across iterations.
+ SmallVector<Instruction *, 6> NoAliasDeclInstructions;
+ for (Instruction &I : *OrigHeader)
+ if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
+ NoAliasDeclInstructions.push_back(Decl);
+
while (I != E) {
Instruction *Inst = &*I++;
@@ -460,6 +469,70 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
}
}
+ if (!NoAliasDeclInstructions.empty()) {
+ // There are noalias scope declarations:
+ // (general):
+ // Original: OrigPre { OrigHeader NewHeader ... Latch }
+ // after: (OrigPre+OrigHeader') { NewHeader ... Latch OrigHeader }
+ //
+ // with D: llvm.experimental.noalias.scope.decl,
+ // U: !noalias or !alias.scope depending on D
+ // ... { D U1 U2 } can transform into:
+ // (0) : ... { D U1 U2 } // no relevant rotation for this part
+ // (1) : ... D' { U1 U2 D } // D is part of OrigHeader
+ // (2) : ... D' U1' { U2 D U1 } // D, U1 are part of OrigHeader
+ //
+ // We now want to transform:
+ // (1) -> : ... D' { D U1 U2 D'' }
+ // (2) -> : ... D' U1' { D U2 D'' U1'' }
+ // D: original llvm.experimental.noalias.scope.decl
+ // D', U1': duplicate with replaced scopes
+ // D'', U1'':
diff erent duplicate with replaced scopes
+ // This ensures a safe fallback to 'may_alias' introduced by the rotate,
+ // as U1'' and U1' scopes will not be compatible wrt to the local restrict
+
+ // Clone the llvm.experimental.noalias.decl again for the NewHeader.
+ Instruction *NewHeaderInsertionPoint = &(*NewHeader->getFirstNonPHI());
+ for (Instruction *NAD : NoAliasDeclInstructions) {
+ LLVM_DEBUG(dbgs() << " Cloning llvm.experimental.noalias.scope.decl:"
+ << *NAD << "\n");
+ Instruction *NewNAD = NAD->clone();
+ NewNAD->insertBefore(NewHeaderInsertionPoint);
+ }
+
+ // Scopes must now be duplicated, once for OrigHeader and once for
+ // OrigPreHeader'.
+ {
+ auto &Context = NewHeader->getContext();
+
+ SmallVector<MetadataAsValue *, 8> NoAliasDeclScopes;
+ for (Instruction *NAD : NoAliasDeclInstructions)
+ NoAliasDeclScopes.push_back(cast<MetadataAsValue>(
+ NAD->getOperand(Intrinsic::NoAliasScopeDeclScopeArg)));
+
+ LLVM_DEBUG(dbgs() << " Updating OrigHeader scopes\n");
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, {OrigHeader}, Context,
+ "h.rot");
+ LLVM_DEBUG(OrigHeader->dump());
+
+ // Keep the compile time impact low by only adapting the inserted block
+ // of instructions in the OrigPreHeader. This might result in slightly
+ // more aliasing between these instructions and those that were already
+ // present, but it will be much faster when the original PreHeader is
+ // large.
+ LLVM_DEBUG(dbgs() << " Updating part of OrigPreheader scopes\n");
+ auto *FirstDecl =
+ cast<Instruction>(ValueMap[*NoAliasDeclInstructions.begin()]);
+ auto *LastInst = &OrigPreheader->back();
+ cloneAndAdaptNoAliasScopes(NoAliasDeclScopes, FirstDecl, LastInst,
+ Context, "pre.rot");
+ LLVM_DEBUG(OrigPreheader->dump());
+
+ LLVM_DEBUG(dbgs() << " Updated NewHeader:\n");
+ LLVM_DEBUG(NewHeader->dump());
+ }
+ }
+
// Along with all the other instructions, we just cloned OrigHeader's
// terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
// successors by duplicating their incoming values for OrigHeader.
diff --git a/llvm/test/Transforms/LoopRotate/noalias.ll b/llvm/test/Transforms/LoopRotate/noalias.ll
new file mode 100644
index 000000000000..9f2dbb85bf96
--- /dev/null
+++ b/llvm/test/Transforms/LoopRotate/noalias.ll
@@ -0,0 +1,185 @@
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
+; RUN: opt -S -loop-rotate -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+; RUN: opt -S -passes='require<targetir>,require<assumptions>,loop(loop-rotate)' < %s | FileCheck %s
+; RUN: opt -S -passes='require<targetir>,require<assumptions>,loop(loop-rotate)' -enable-mssa-loop-dependency=true -verify-memoryssa < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @g(i32*)
+
+define void @test_02(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_02(
+; CHECK: entry:
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !2
+; CHECK: for.body:
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !7)
+; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !7
+; CHECK: for.end:
+
+entry:
+ %array = alloca [20 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+ store i32 42, i32* %_pA, align 16, !alias.scope !2
+ %cmp = icmp slt i32 %i.0, 100
+ %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ store i32 0, i32* %arrayidx, align 16, !noalias !2
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+ call void @g(i32* %arrayidx.lcssa) nounwind
+ ret void
+}
+
+define void @test_03(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_03(
+; CHECK: entry:
+; CHECK: for.body:
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !5
+; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK: for.end:
+
+entry:
+ %array = alloca [20 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, 100
+ %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+ store i32 42, i32* %_pA, align 16, !alias.scope !2
+ store i32 0, i32* %arrayidx, align 16, !noalias !2
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+ call void @g(i32* %arrayidx.lcssa) nounwind
+ ret void
+}
+
+define void @test_04(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_04(
+; CHECK: entry:
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !9)
+; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !9
+; CHECK: for.body:
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK: store i32 43, i32* %_pA, align 16, !alias.scope !5
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !11)
+; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !11
+; CHECK: for.end:
+entry:
+ %array = alloca [20 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+ store i32 42, i32* %_pA, align 16, !alias.scope !2
+ %cmp = icmp slt i32 %i.0, 100
+ %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ store i32 0, i32* %arrayidx, align 16, !noalias !2
+ store i32 43, i32* %_pA, align 16, !alias.scope !2
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+ call void @g(i32* %arrayidx.lcssa) nounwind
+ ret void
+}
+
+define void @test_05(i32* nocapture %_pA) nounwind ssp {
+; CHECK-LABEL: @test_05(
+; CHECK: entry:
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !13)
+; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !13
+; CHECK: for.body:
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !5)
+; CHECK: store i32 0, i32* %arrayidx, align 16, !noalias !5
+; CHECK: store i32 43, i32* %_pA, align 16, !alias.scope !5
+; CHECK: tail call void @llvm.experimental.noalias.scope.decl(metadata !15)
+; CHECK: store i32 42, i32* %_pA, align 16, !alias.scope !15
+; CHECK: for.end:
+; CHECK: store i32 44, i32* %_pA, align 16, !alias.scope !5
+
+entry:
+ %array = alloca [20 x i32], align 16
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ tail call void @llvm.experimental.noalias.scope.decl(metadata !2)
+ store i32 42, i32* %_pA, align 16, !alias.scope !2
+ %cmp = icmp slt i32 %i.0, 100
+ %arrayidx = getelementptr inbounds [20 x i32], [20 x i32]* %array, i64 0, i64 0
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ store i32 0, i32* %arrayidx, align 16, !noalias !2
+ store i32 43, i32* %_pA, align 16, !alias.scope !2
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ %arrayidx.lcssa = phi i32* [ %arrayidx, %for.cond ]
+ store i32 44, i32* %_pA, align 16, !alias.scope !2
+ call void @g(i32* %arrayidx.lcssa) nounwind
+ ret void
+}
+
+; Function Attrs: inaccessiblememonly nounwind
+declare void @llvm.experimental.noalias.scope.decl(metadata) #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { inaccessiblememonly nounwind }
+attributes #2 = { nounwind readnone speculatable }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang"}
+!2 = !{!3}
+!3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA"}
+!4 = distinct !{!4, !"test_loop_rotate_XX"}
+
+; CHECK: !0 = !{i32 1, !"wchar_size", i32 4}
+; CHECK: !1 = !{!"clang"}
+; CHECK: !2 = !{!3}
+; CHECK: !3 = distinct !{!3, !4, !"test_loop_rotate_XX: pA:pre.rot"}
+; CHECK: !4 = distinct !{!4, !"test_loop_rotate_XX"}
+; CHECK: !5 = !{!6}
+; CHECK: !6 = distinct !{!6, !4, !"test_loop_rotate_XX: pA"}
+; CHECK: !7 = !{!8}
+; CHECK: !8 = distinct !{!8, !4, !"test_loop_rotate_XX: pA:h.rot"}
+; CHECK: !9 = !{!10}
+; CHECK: !10 = distinct !{!10, !4, !"test_loop_rotate_XX: pA:pre.rot"}
+; CHECK: !11 = !{!12}
+; CHECK: !12 = distinct !{!12, !4, !"test_loop_rotate_XX: pA:h.rot"}
+; CHECK: !13 = !{!14}
+; CHECK: !14 = distinct !{!14, !4, !"test_loop_rotate_XX: pA:pre.rot"}
+; CHECK: !15 = !{!16}
+; CHECK: !16 = distinct !{!16, !4, !"test_loop_rotate_XX: pA:h.rot"}
More information about the llvm-branch-commits
mailing list