[llvm] 3f078b3 - [AAPointerInfo] OffsetInfo: Unassigned is distinct from Unknown
Sameer Sahasrabuddhe via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 28 08:04:07 PDT 2022
Author: Sameer Sahasrabuddhe
Date: 2022-09-28T20:31:36+05:30
New Revision: 3f078b308bc67d3a05dc0de2588790ed9669febc
URL: https://github.com/llvm/llvm-project/commit/3f078b308bc67d3a05dc0de2588790ed9669febc
DIFF: https://github.com/llvm/llvm-project/commit/3f078b308bc67d3a05dc0de2588790ed9669febc.diff
LOG: [AAPointerInfo] OffsetInfo: Unassigned is distinct from Unknown
A User like the PHINode may be visited multiple times for the same pointer along
different def-use edges. The uninitialized state of OffsetInfo at the first
visit needs to be distinct from the Unknown value that may be assigned after
processing the PHINode. Without that, a PHINode with all inputs Unknown is never
followed to its uses. This results in incorrect optimization because some
interfering accessess are missed.
Differential Revision: https://reviews.llvm.org/D134704
Added:
llvm/test/CodeGen/AMDGPU/attributor.ll
Modified:
llvm/include/llvm/Transforms/IPO/Attributor.h
llvm/lib/Transforms/IPO/AttributorAttributes.cpp
llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 81459eda4cadb..c9aa7419dec40 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -5067,8 +5067,9 @@ struct AAPointerInfo : public AbstractAttribute {
OAS.getOffset() < getOffset() + getSize();
}
- /// Constant used to represent unknown offset or sizes.
- static constexpr int64_t Unknown = 1 << 31;
+ /// Constants used to represent special offsets or sizes.
+ static constexpr int64_t Unassigned = -1;
+ static constexpr int64_t Unknown = -2;
};
/// Call \p CB on all accesses that might interfere with \p OAS and return
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 793467d28d204..a0dcd36ac45d5 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1226,7 +1226,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
/// Helper struct, will support ranges eventually.
struct OffsetInfo {
- int64_t Offset = OffsetAndSize::Unknown;
+ int64_t Offset = OffsetAndSize::Unassigned;
bool operator==(const OffsetInfo &OI) const { return Offset == OI.Offset; }
};
@@ -1243,6 +1243,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo PtrOI,
bool &Follow) {
+ assert(PtrOI.Offset != OffsetAndSize::Unassigned &&
+ "Cannot pass through if the input Ptr was not visited!");
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
UsrOI = PtrOI;
Follow = true;
@@ -1283,11 +1285,15 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
APInt GEPOffset(DL.getIndexTypeSizeInBits(GEP->getType()), 0);
if (PtrOI.Offset == OffsetAndSize::Unknown ||
!GEP->accumulateConstantOffset(DL, GEPOffset)) {
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset not constant "
+ << *GEP << "\n");
UsrOI.Offset = OffsetAndSize::Unknown;
Follow = true;
return true;
}
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] GEP offset is constant " << *GEP
+ << "\n");
UsrOI.Offset = PtrOI.Offset + GEPOffset.getZExtValue();
Follow = true;
return true;
@@ -1306,15 +1312,22 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
bool IsFirstPHIUser = !OffsetInfoMap.count(Usr);
OffsetInfo &UsrOI = OffsetInfoMap[Usr];
OffsetInfo &PtrOI = OffsetInfoMap[CurPtr];
- // Check if the PHI is invariant (so far).
- if (UsrOI == PtrOI)
- return true;
// Check if the PHI operand has already an unknown offset as we can't
// improve on that anymore.
if (PtrOI.Offset == OffsetAndSize::Unknown) {
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown "
+ << *CurPtr << " in " << *Usr << "\n");
+ Follow = UsrOI.Offset != OffsetAndSize::Unknown;
UsrOI = PtrOI;
- Follow = true;
+ return true;
+ }
+
+ // Check if the PHI is invariant (so far).
+ if (UsrOI == PtrOI) {
+ assert(PtrOI.Offset != OffsetAndSize::Unassigned &&
+ "Cannot assign if the current Ptr was not visited!");
+ LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)");
return true;
}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor.ll b/llvm/test/CodeGen/AMDGPU/attributor.ll
new file mode 100644
index 0000000000000..b7a219958823a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attributor.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s | FileCheck %s
+
+target triple = "amdgcn-amd-amdhsa"
+
+; The call to intrinsic implicitarg_ptr reaches a load through a phi. The
+; offsets of the phi cannot be determined, and hence the attirbutor assumes that
+; hostcall is in use.
+
+; CHECK: .value_kind: hidden_hostcall_buffer
+; CHECK: .value_kind: hidden_multigrid_sync_arg
+
+define amdgpu_kernel void @the_kernel(i32 addrspace(1)* %a, i64 %index1, i64 %index2, i1 %cond) {
+entry:
+ %tmp7 = tail call i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ br i1 %cond, label %old, label %new
+
+old: ; preds = %entry
+ %tmp4 = getelementptr i8, i8 addrspace(4)* %tmp7, i64 %index1
+ br label %join
+
+new: ; preds = %entry
+ %tmp12 = getelementptr inbounds i8, i8 addrspace(4)* %tmp7, i64 %index2
+ br label %join
+
+join: ; preds = %new, %old
+ %.in.in.in = phi i8 addrspace(4)* [ %tmp12, %new ], [ %tmp4, %old ]
+ %.in.in = bitcast i8 addrspace(4)* %.in.in.in to i16 addrspace(4)*
+
+ ;;; THIS USE is where the offset into implicitarg_ptr is unknown
+ %.in = load i16, i16 addrspace(4)* %.in.in, align 2
+
+ %idx.ext = sext i16 %.in to i64
+ %add.ptr3 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
+ %tmp16 = atomicrmw add i32 addrspace(1)* %add.ptr3, i32 15 syncscope("agent-one-as") monotonic, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+
+declare align 4 i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+
+declare i32 @llvm.amdgcn.workgroup.id.x()
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index a94a5e6468373..5105fdda8d01a 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -1616,6 +1616,71 @@ entry:
ret i32 %add1
}
+define i8 @local_alloca_not_simplifiable_2(i64 %index1, i64 %index2, i1 %cnd) {
+; TUNIT: Function Attrs: nofree norecurse nosync nounwind willreturn
+; TUNIT-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_2
+; TUNIT-SAME: (i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 [[CND:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT: entry:
+; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; TUNIT-NEXT: [[GEP0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
+; TUNIT-NEXT: store i8 7, i8* [[GEP0]], align 16
+; TUNIT-NEXT: br i1 [[CND]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
+; TUNIT: left:
+; TUNIT-NEXT: [[GEP1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX1]]
+; TUNIT-NEXT: br label [[JOIN:%.*]]
+; TUNIT: right:
+; TUNIT-NEXT: [[GEP2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX2]]
+; TUNIT-NEXT: br label [[JOIN]]
+; TUNIT: join:
+; TUNIT-NEXT: [[GEP_JOIN:%.*]] = phi i8* [ [[GEP1]], [[LEFT]] ], [ [[GEP2]], [[RIGHT]] ]
+; TUNIT-NEXT: store i8 9, i8* [[GEP_JOIN]], align 4
+; TUNIT-NEXT: [[I:%.*]] = load i8, i8* [[GEP0]], align 16
+; TUNIT-NEXT: ret i8 [[I]]
+;
+; CGSCC: Function Attrs: nofree norecurse nosync nounwind willreturn
+; CGSCC-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_2
+; CGSCC-SAME: (i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 [[CND:%.*]]) #[[ATTR5]] {
+; CGSCC-NEXT: entry:
+; CGSCC-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
+; CGSCC-NEXT: [[GEP0:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 0
+; CGSCC-NEXT: store i8 7, i8* [[GEP0]], align 16
+; CGSCC-NEXT: br i1 [[CND]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
+; CGSCC: left:
+; CGSCC-NEXT: [[GEP1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX1]]
+; CGSCC-NEXT: br label [[JOIN:%.*]]
+; CGSCC: right:
+; CGSCC-NEXT: [[GEP2:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDEX2]]
+; CGSCC-NEXT: br label [[JOIN]]
+; CGSCC: join:
+; CGSCC-NEXT: [[GEP_JOIN:%.*]] = phi i8* [ [[GEP1]], [[LEFT]] ], [ [[GEP2]], [[RIGHT]] ]
+; CGSCC-NEXT: store i8 9, i8* [[GEP_JOIN]], align 4
+; CGSCC-NEXT: [[I:%.*]] = load i8, i8* [[GEP0]], align 16
+; CGSCC-NEXT: ret i8 [[I]]
+;
+entry:
+ %Bytes = alloca [1024 x i8], align 16
+ %gep0 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 0
+ store i8 7, i8* %gep0, align 4
+ br i1 %cnd, label %left, label %right
+
+left: ; preds = %entry
+ %gep1 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %index1
+ br label %join
+
+right: ; preds = %entry
+ %gep2 = getelementptr inbounds [1024 x i8], [1024 x i8]* %Bytes, i64 0, i64 %index2
+ br label %join
+
+join: ; preds = %right, %left
+ %gep.join = phi i8* [ %gep1, %left ], [ %gep2, %right ]
+ store i8 9, i8* %gep.join, align 4
+
+ ; This load cannot be replaced by the value 7 from %entry, since the previous
+ ; store interferes due to its unknown offset.
+ %i = load i8, i8* %gep0, align 4
+ ret i8 %i
+}
+
; We could simplify these if we separate accessed bins wrt. alignment (here mod 4).
define i32 @unknown_access_mixed_simplifiable(i32 %arg1, i32 %arg2) {
; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
More information about the llvm-commits
mailing list