[llvm-branch-commits] [llvm] 542a08d - [LAA] Avoid adding pointers to the checks if they are not needed.
Hans Wennborg via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Aug 3 06:58:32 PDT 2020
Author: Florian Hahn
Date: 2020-08-03T15:55:25+02:00
New Revision: 542a08dcb7a8044c0ba52146d866515603fad122
URL: https://github.com/llvm/llvm-project/commit/542a08dcb7a8044c0ba52146d866515603fad122
DIFF: https://github.com/llvm/llvm-project/commit/542a08dcb7a8044c0ba52146d866515603fad122.diff
LOG: [LAA] Avoid adding pointers to the checks if they are not needed.
Currently we skip alias sets with only reads or a single write and no
reads, but still add the pointers to the list of pointers in RtCheck.
This can lead to cases where we try to access a pointer that does not
exist when grouping checks. In most cases, the way we access
PositionMap masked that, as the value would default to index 0.
But in the example in PR46854 it causes a crash.
This patch updates the logic to avoid adding pointers for alias sets
that do not need any checks. It makes things slightly more verbose, by
first checking the numbers of reads/writes and bailing out early if we don't
need checks for the alias set.
I think this makes the logic a bit simpler to follow.
Reviewed By: anemet
Differential Revision: https://reviews.llvm.org/D84608
(cherry picked from commit 2062b3707c1ef698deaa9abc571b937fdd077168)
Added:
llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll
Modified:
llvm/lib/Analysis/LoopAccessAnalysis.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index ae282a7a1095..f409cd322146 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -393,7 +393,10 @@ void RuntimePointerChecking::groupChecks(
// equivalence class, the iteration order is deterministic.
for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
MI != ME; ++MI) {
- unsigned Pointer = PositionMap[MI->getPointer()];
+ auto PointerI = PositionMap.find(MI->getPointer());
+ assert(PointerI != PositionMap.end() &&
+ "pointer in equivalence class not found in PositionMap");
+ unsigned Pointer = PointerI->second;
bool Merged = false;
// Mark this pointer as seen.
Seen.insert(Pointer);
@@ -726,52 +729,55 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
SmallVector<MemAccessInfo, 4> Retries;
+ // First, count how many write and read accesses are in the alias set. Also
+ // collect MemAccessInfos for later.
+ SmallVector<MemAccessInfo, 4> AccessInfos;
for (auto A : AS) {
Value *Ptr = A.getValue();
bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
- MemAccessInfo Access(Ptr, IsWrite);
if (IsWrite)
++NumWritePtrChecks;
else
++NumReadPtrChecks;
+ AccessInfos.emplace_back(Ptr, IsWrite);
+ }
+ // We do not need runtime checks for this alias set, if there are no writes
+ // or a single write and no reads.
+ if (NumWritePtrChecks == 0 ||
+ (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
+ assert((AS.size() <= 1 ||
+ all_of(AS,
+ [this](auto AC) {
+ MemAccessInfo AccessWrite(AC.getValue(), true);
+ return DepCands.findValue(AccessWrite) == DepCands.end();
+ })) &&
+ "Can only skip updating CanDoRT below, if all entries in AS "
+ "are reads or there is at most 1 entry");
+ continue;
+ }
+
+ for (auto &Access : AccessInfos) {
if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
RunningDepId, ASId, ShouldCheckWrap, false)) {
- LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" << *Ptr << '\n');
+ LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
+ << *Access.getPointer() << '\n');
Retries.push_back(Access);
CanDoAliasSetRT = false;
}
}
- // If we have at least two writes or one write and a read then we need to
- // check them. But there is no need to checks if there is only one
- // dependence set for this alias set.
- //
// Note that this function computes CanDoRT and MayNeedRTCheck
// independently. For example CanDoRT=false, MayNeedRTCheck=false means that
// we have a pointer for which we couldn't find the bounds but we don't
// actually need to emit any checks so it does not matter.
- bool NeedsAliasSetRTCheck = false;
- if (!(IsDepCheckNeeded && CanDoAliasSetRT && RunningDepId == 2)) {
- NeedsAliasSetRTCheck = (NumWritePtrChecks >= 2 ||
- (NumReadPtrChecks >= 1 && NumWritePtrChecks >= 1));
- // For alias sets without at least 2 writes or 1 write and 1 read, there
- // is no need to generate RT checks and CanDoAliasSetRT for this alias set
- // does not impact whether runtime checks can be generated.
- if (!NeedsAliasSetRTCheck) {
- assert((AS.size() <= 1 ||
- all_of(AS,
- [this](auto AC) {
- MemAccessInfo AccessWrite(AC.getValue(), true);
- return DepCands.findValue(AccessWrite) ==
- DepCands.end();
- })) &&
- "Can only skip updating CanDoRT below, if all entries in AS "
- "are reads or there is at most 1 entry");
- continue;
- }
- }
+ //
+ // We need runtime checks for this alias set, if there are at least 2
+ // dependence sets (in which case RunningDepId > 2) or if we need to re-try
+ // any bound checks (because in that case the number of dependence sets is
+ // incomplete).
+ bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
// We need to perform run-time alias checks, but some pointers had bounds
// that couldn't be checked.
diff --git a/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll b/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll
new file mode 100644
index 000000000000..396899d8d280
--- /dev/null
+++ b/llvm/test/Transforms/LoopLoadElim/pr46854-adress-spaces.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; REQUIRES: amdgpu-registered-target
+
+; RUN: opt -globals-aa -loop-load-elim -S %s | FileCheck %s
+
+target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-ni:7"
+target triple = "amdgcn-amd-amdhsa"
+
+%struct.foo = type { %struct.pluto, i8, i8*, i32 }
+%struct.pluto = type { i32, i32, i32, %struct.wombat*, i32, i32, i32 }
+%struct.wombat = type { %struct.barney }
+%struct.barney = type { <2 x float> }
+
+ at global = external protected local_unnamed_addr addrspace(4) externally_initialized global %struct.foo, align 8
+ at global.1 = internal unnamed_addr addrspace(3) constant [4000 x float] undef, align 16
+
+; Function Attrs: nounwind
+define protected amdgpu_kernel void @widget(i32 %arg, i32 %arg1) #0 {
+; CHECK-LABEL: @widget(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: [[TMP:%.*]] = getelementptr inbounds [4000 x float], [4000 x float] addrspace(3)* @global.1, i32 0, i32 [[ARG:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = load %struct.wombat*, %struct.wombat* addrspace(4)* getelementptr inbounds (%struct.foo, [[STRUCT_FOO:%.*]] addrspace(4)* @global, i64 0, i32 0, i32 3), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds [[STRUCT_WOMBAT:%.*]], %struct.wombat* [[TMP2]], i64 undef, i32 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast %struct.barney* [[TMP3]] to i64*
+; CHECK-NEXT: br label [[BB5:%.*]]
+; CHECK: bb5.loopexit:
+; CHECK-NEXT: br label [[BB5]]
+; CHECK: bb5:
+; CHECK-NEXT: br label [[BB6:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: [[TMP7:%.*]] = phi i32 [ undef, [[BB5]] ], [ [[TMP19:%.*]], [[BB6]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = mul nsw i32 [[TMP7]], undef
+; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], undef
+; CHECK-NEXT: [[TMP10:%.*]] = sext i32 [[TMP9]] to i64
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_WOMBAT]], %struct.wombat* [[TMP2]], i64 [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast %struct.wombat* [[TMP11]] to i64*
+; CHECK-NEXT: [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
+; CHECK-NEXT: [[TMP14:%.*]] = srem i32 1, [[ARG1:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = add nuw nsw i32 [[TMP14]], 1
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds [4000 x float], [4000 x float] addrspace(3)* @global.1, i32 0, i32 [[TMP15]]
+; CHECK-NEXT: [[TMP17:%.*]] = load float, float addrspace(3)* [[TMP16]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = load float, float addrspace(3)* [[TMP]], align 4
+; CHECK-NEXT: store i64 [[TMP13]], i64* [[TMP4]], align 8
+; CHECK-NEXT: [[TMP19]] = add nsw i32 [[TMP7]], 1
+; CHECK-NEXT: [[TMP20:%.*]] = icmp slt i32 [[TMP7]], 3
+; CHECK-NEXT: br i1 [[TMP20]], label [[BB6]], label [[BB5_LOOPEXIT:%.*]]
+;
+bb:
+ %tmp = getelementptr inbounds [4000 x float], [4000 x float] addrspace(3)* @global.1, i32 0, i32 %arg
+ %tmp2 = load %struct.wombat*, %struct.wombat* addrspace(4)* getelementptr inbounds (%struct.foo, %struct.foo addrspace(4)* @global, i64 0, i32 0, i32 3), align 8
+ %tmp3 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp2, i64 undef, i32 0
+ %tmp4 = bitcast %struct.barney* %tmp3 to i64*
+ br label %bb5
+
+bb5: ; preds = %bb6, %bb
+ br label %bb6
+
+bb6: ; preds = %bb6, %bb5
+ %tmp7 = phi i32 [ undef, %bb5 ], [ %tmp19, %bb6 ]
+ %tmp8 = mul nsw i32 %tmp7, undef
+ %tmp9 = add i32 %tmp8, undef
+ %tmp10 = sext i32 %tmp9 to i64
+ %tmp11 = getelementptr inbounds %struct.wombat, %struct.wombat* %tmp2, i64 %tmp10
+ %tmp12 = bitcast %struct.wombat* %tmp11 to i64*
+ %tmp13 = load i64, i64* %tmp12, align 8
+ %tmp14 = srem i32 1, %arg1
+ %tmp15 = add nuw nsw i32 %tmp14, 1
+ %tmp16 = getelementptr inbounds [4000 x float], [4000 x float] addrspace(3)* @global.1, i32 0, i32 %tmp15
+ %tmp17 = load float, float addrspace(3)* %tmp16, align 4
+ %tmp18 = load float, float addrspace(3)* %tmp, align 4
+ store i64 %tmp13, i64* %tmp4, align 8
+ %tmp19 = add nsw i32 %tmp7, 1
+ %tmp20 = icmp slt i32 %tmp7, 3
+ br i1 %tmp20, label %bb6, label %bb5
+}
+
+attributes #0 = { nounwind }
More information about the llvm-branch-commits
mailing list