[llvm] [InferAddressSpaces] Handle unconverted ptrmask (PR #140802)
Robert Imschweiler via llvm-commits
llvm-commits at lists.llvm.org
Thu May 22 13:52:32 PDT 2025
https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/140802
>From 7e2f064c3323933a18f1f71e3bc998fcce90cd4a Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 20 May 2025 15:55:36 -0500
Subject: [PATCH 1/5] [InferAddressSpaces] Handle unconverted ptrmask
In case a ptrmask cannot be converted to the new address space due to an
unknown mask value, this needs to be detcted and an addrspacecast is
needed to not hinder a future use of the unconverted return value of
ptrmask. Otherwise, users of this value will become invalid by receiving
a nullptr as an operand.
This LLVM defect was identified via the AMD Fuzzing project.
---
.../Transforms/Scalar/InferAddressSpaces.cpp | 15 ++++++++++++++-
.../InferAddressSpaces/AMDGPU/ptrmask.ll | 18 ++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index d3771c0903456..4f2e8bbd1102a 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1338,7 +1338,20 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
unsigned OperandNo = PoisonUse->getOperandNo();
assert(isa<PoisonValue>(NewV->getOperand(OperandNo)));
- NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(PoisonUse->get()));
+ WeakTrackingVH NewOp = ValueWithNewAddrSpace.lookup(PoisonUse->get());
+ if (NewOp) {
+ NewV->setOperand(OperandNo, NewOp);
+ } else {
+ // Something went wrong while converting the instruction defining the new
+ // operand value. -> Replace the poison value with the previous operand
+ // value combined with an addrspace case.
+ Value *PoisonOp = NewV->getOperand(OperandNo);
+ Value *OldOp = V->getOperand(OperandNo);
+ Value *AddrSpaceCast =
+ new AddrSpaceCastInst(OldOp, PoisonOp->getType(), "",
+ cast<Instruction>(NewV)->getIterator());
+ NewV->setOperand(OperandNo, AddrSpaceCast);
+ }
}
SmallVector<Instruction *, 16> DeadInstructions;
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index 6ef926f935830..1c1d1df79520d 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -343,6 +343,24 @@ define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr,
ret i8 %load
}
+; Non-const masks with no known range should not prevent other ptr-manipulating
+; instructions (such as gep) from being converted.
+define i8 @ptrmask_cast_local_to_flat_unknown_mask(ptr addrspace(3) %src.ptr, i64 %mask, i64 %idx) {
+; CHECK-LABEL: @ptrmask_cast_local_to_flat_unknown_mask(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP1]], i64 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 %mask)
+ %gep = getelementptr i8, ptr %masked, i64 %idx
+ %load = load i8, ptr %gep
+ ret i8 %load
+}
+
declare ptr @llvm.ptrmask.p0.i64(ptr, i64) #0
declare ptr addrspace(5) @llvm.ptrmask.p5.i32(ptr addrspace(5), i32) #0
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
>From 0d02438f6e135af394d67dedb18b12f19e38b397 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 21 May 2025 03:42:35 -0500
Subject: [PATCH 2/5] implement feedback
---
.../Transforms/Scalar/InferAddressSpaces.cpp | 2 +-
.../InferAddressSpaces/AMDGPU/ptrmask.ll | 18 ++++++++++++++++++
2 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 4f2e8bbd1102a..3b05dd2bddcb3 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -1344,7 +1344,7 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
} else {
// Something went wrong while converting the instruction defining the new
// operand value. -> Replace the poison value with the previous operand
- // value combined with an addrspace case.
+ // value combined with an addrspacecast.
Value *PoisonOp = NewV->getOperand(OperandNo);
Value *OldOp = V->getOperand(OperandNo);
Value *AddrSpaceCast =
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index 1c1d1df79520d..ccc7df89dcd8e 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -361,7 +361,25 @@ define i8 @ptrmask_cast_local_to_flat_unknown_mask(ptr addrspace(3) %src.ptr, i6
ret i8 %load
}
+define i8 @ptrmask_vector_cast_local_to_flat_unknown_mask(<2 x ptr addrspace(3)> %src.ptr, <2 x i64> %mask, i64 %ptridx, i64 %idx) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_to_flat_unknown_mask(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <2 x ptr addrspace(3)> [[SRC_PTR:%.*]] to <2 x ptr>
+; CHECK-NEXT: [[MASKED:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[CAST]], <2 x i64> [[MASK:%.*]])
+; CHECK-NEXT: [[PTR:%.*]] = extractelement <2 x ptr> [[MASKED]], i64 [[PTRIDX:%.*]]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[PTR]], i64 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %cast = addrspacecast <2 x ptr addrspace(3)> %src.ptr to <2 x ptr>
+ %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %cast, <2 x i64> %mask)
+ %ptr = extractelement <2 x ptr> %masked, i64 %ptridx
+ %gep = getelementptr i8, ptr %ptr, i64 %idx
+ %load = load i8, ptr %gep
+ ret i8 %load
+}
+
declare ptr @llvm.ptrmask.p0.i64(ptr, i64) #0
+declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>) #0
declare ptr addrspace(5) @llvm.ptrmask.p5.i32(ptr addrspace(5), i32) #0
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
>From 1648319378d05a48ab8b63fa73039cf9b7f3347a Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 21 May 2025 08:39:38 -0500
Subject: [PATCH 3/5] implement alternative approach
---
.../Transforms/Scalar/InferAddressSpaces.cpp | 34 +++++-------
.../InferAddressSpaces/AMDGPU/ptrmask.ll | 52 ++++++++++++++-----
2 files changed, 53 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 3b05dd2bddcb3..f16c97f206f63 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -644,9 +644,6 @@ static Value *operandWithNewAddressSpaceOrCreatePoison(
// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
// from a pointer whose type already matches. Therefore, this function returns a
// Value* instead of an Instruction*.
-//
-// This may also return nullptr in the case the instruction could not be
-// rewritten.
Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
@@ -669,17 +666,24 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
// Technically the intrinsic ID is a pointer typed argument, so specially
// handle calls early.
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
+ const Use &PtrArgUse = II->getArgOperandUse(0);
Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
- II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
- PredicatedAS, PoisonUsesToFix);
+ PtrArgUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+ PoisonUsesToFix);
Value *Rewrite =
TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
if (Rewrite) {
assert(Rewrite != II && "cannot modify this pointer operation in place");
return Rewrite;
}
-
- return nullptr;
+ // Leave the ptrmask as-is and insert an addrspacecast after it.
+ Instruction *AddrSpaceCast = new AddrSpaceCastInst(II, NewPtr->getType());
+ AddrSpaceCast->insertAfter(II->getIterator());
+ AddrSpaceCast->setDebugLoc(II->getDebugLoc());
+ // If we generated a poison operand for the ptr argument, remove it.
+ if (!PoisonUsesToFix->empty() && PoisonUsesToFix->back() == &PtrArgUse)
+ PoisonUsesToFix->pop_back();
+ return AddrSpaceCast;
}
unsigned AS = TTI->getAssumedAddrSpace(I);
@@ -1339,19 +1343,9 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
unsigned OperandNo = PoisonUse->getOperandNo();
assert(isa<PoisonValue>(NewV->getOperand(OperandNo)));
WeakTrackingVH NewOp = ValueWithNewAddrSpace.lookup(PoisonUse->get());
- if (NewOp) {
- NewV->setOperand(OperandNo, NewOp);
- } else {
- // Something went wrong while converting the instruction defining the new
- // operand value. -> Replace the poison value with the previous operand
- // value combined with an addrspacecast.
- Value *PoisonOp = NewV->getOperand(OperandNo);
- Value *OldOp = V->getOperand(OperandNo);
- Value *AddrSpaceCast =
- new AddrSpaceCastInst(OldOp, PoisonOp->getType(), "",
- cast<Instruction>(NewV)->getIterator());
- NewV->setOperand(OperandNo, AddrSpaceCast);
- }
+ assert(NewOp &&
+ "poison replacements in ValueWithNewAddrSpace shouldn't be null");
+ NewV->setOperand(OperandNo, NewOp);
}
SmallVector<Instruction *, 16> DeadInstructions;
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index ccc7df89dcd8e..bc89d49b3cd2b 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -3,9 +3,10 @@
define i8 @ptrmask_cast_local_to_flat(ptr addrspace(3) %src.ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -16,9 +17,10 @@ define i8 @ptrmask_cast_local_to_flat(ptr addrspace(3) %src.ptr, i64 %mask) {
define i8 @ptrmask_cast_private_to_flat(ptr addrspace(5) %src.ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_private_to_flat(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(5)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(5) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(5) %src.ptr to ptr
@@ -29,9 +31,10 @@ define i8 @ptrmask_cast_private_to_flat(ptr addrspace(5) %src.ptr, i64 %mask) {
define i8 @ptrmask_cast_region_to_flat(ptr addrspace(2) %src.ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_region_to_flat(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(2)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(2) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(2) %src.ptr to ptr
@@ -109,7 +112,8 @@ define i8 @ptrmask_cast_flat_to_global(ptr %ptr, i64 %mask) {
define i8 @ptrmask_cast_local_to_flat_global(i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_global(
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) @lds0 to ptr), i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%masked = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) @lds0 to ptr), i64 %mask)
@@ -150,7 +154,8 @@ define i8 @multi_ptrmask_cast_local_to_flat(ptr addrspace(3) %src.ptr, i64 %mask
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
; CHECK-NEXT: [[LOAD0:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]]
; CHECK-NEXT: ret i8 [[ADD]]
;
@@ -167,7 +172,8 @@ define i8 @multi_ptrmask_cast_region_to_flat(ptr addrspace(2) %src.ptr, i64 %mas
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[SRC_PTR:%.*]] to ptr
; CHECK-NEXT: [[LOAD0:%.*]] = load i8, ptr addrspace(2) [[SRC_PTR]], align 1
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(2)
+; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr addrspace(2) [[TMP1]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]]
; CHECK-NEXT: ret i8 [[ADD]]
;
@@ -182,9 +188,10 @@ define i8 @multi_ptrmask_cast_region_to_flat(ptr addrspace(2) %src.ptr, i64 %mas
; Do not fold this since it clears a single high bit.
define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 -4294967297)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -196,9 +203,10 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(ptr addrspace(
; Do not fold this since it clears a single high bit.
define i8 @ptrmask_cast_local_to_flat_const_mask_7fffffffffffffff(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_7fffffffffffffff(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 9223372036854775807)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -378,6 +386,24 @@ define i8 @ptrmask_vector_cast_local_to_flat_unknown_mask(<2 x ptr addrspace(3)>
ret i8 %load
}
+define i8 @interleaved_ptrmask_cast_local_to_flat_unknown_mask(ptr addrspace(3) %src.ptr, i64 %mask, i64 %idx) {
+; CHECK-LABEL: @interleaved_ptrmask_cast_local_to_flat_unknown_mask(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GEP1:%.*]] to ptr
+; CHECK-NEXT: store i64 [[MASK:%.*]], ptr addrspace(3) [[GEP1]], align 8
+; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK]])
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP1]], i64 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
+ store i64 %mask, ptr %cast
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 %mask)
+ %gep = getelementptr i8, ptr %masked, i64 %idx
+ %load = load i8, ptr %gep
+ ret i8 %load
+}
+
declare ptr @llvm.ptrmask.p0.i64(ptr, i64) #0
declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>) #0
declare ptr addrspace(5) @llvm.ptrmask.p5.i32(ptr addrspace(5), i32) #0
>From 3659e060e97537444ed98c9730232941ceb4f581 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 22 May 2025 09:34:33 -0500
Subject: [PATCH 4/5] pull ptrmask handling completely into InferAddressSpaces
---
.../AMDGPU/AMDGPUTargetTransformInfo.cpp | 35 -----------
.../Transforms/Scalar/InferAddressSpaces.cpp | 59 ++++++++++++++-----
2 files changed, 43 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 05e1aa02d2703..49f935534dfba 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1066,41 +1066,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
return NewVal;
}
- case Intrinsic::ptrmask: {
- unsigned OldAS = OldV->getType()->getPointerAddressSpace();
- unsigned NewAS = NewV->getType()->getPointerAddressSpace();
- Value *MaskOp = II->getArgOperand(1);
- Type *MaskTy = MaskOp->getType();
-
- bool DoTruncate = false;
-
- const GCNTargetMachine &TM =
- static_cast<const GCNTargetMachine &>(getTLI()->getTargetMachine());
- if (!TM.isNoopAddrSpaceCast(OldAS, NewAS)) {
- // All valid 64-bit to 32-bit casts work by chopping off the high
- // bits. Any masking only clearing the low bits will also apply in the new
- // address space.
- if (DL.getPointerSizeInBits(OldAS) != 64 ||
- DL.getPointerSizeInBits(NewAS) != 32)
- return nullptr;
-
- // TODO: Do we need to thread more context in here?
- KnownBits Known = computeKnownBits(MaskOp, DL, 0, nullptr, II);
- if (Known.countMinLeadingOnes() < 32)
- return nullptr;
-
- DoTruncate = true;
- }
-
- IRBuilder<> B(II);
- if (DoTruncate) {
- MaskTy = B.getInt32Ty();
- MaskOp = B.CreateTrunc(MaskOp, MaskTy);
- }
-
- return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
- {NewV, MaskOp});
- }
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
Type *DestTy = II->getType();
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index f16c97f206f63..5324c93add92d 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -666,24 +666,51 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
// Technically the intrinsic ID is a pointer typed argument, so specially
// handle calls early.
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
- const Use &PtrArgUse = II->getArgOperandUse(0);
+ const Use &PtrOpUse = II->getArgOperandUse(0);
+ unsigned OldAddrSpace = PtrOpUse.get()->getType()->getPointerAddressSpace();
+ Value *MaskOp = II->getArgOperand(1);
+ Type *MaskTy = MaskOp->getType();
+
+ bool DoTruncate = false;
+ bool DoNotConvert = false;
+
+ if (!TTI->isNoopAddrSpaceCast(OldAddrSpace, NewAddrSpace)) {
+ // All valid 64-bit to 32-bit casts work by chopping off the high
+ // bits. Any masking only clearing the low bits will also apply in the new
+ // address space.
+ if (DL->getPointerSizeInBits(OldAddrSpace) != 64 ||
+ DL->getPointerSizeInBits(NewAddrSpace) != 32) {
+ DoNotConvert = true;
+ } else {
+ // TODO: Do we need to thread more context in here?
+ KnownBits Known = computeKnownBits(MaskOp, *DL, 0, nullptr, II);
+ if (Known.countMinLeadingOnes() < 32)
+ DoNotConvert = true;
+ else
+ DoTruncate = true;
+ }
+ }
+ if (DoNotConvert) {
+ // Leave the ptrmask as-is and insert an addrspacecast after it.
+ std::optional<BasicBlock::iterator> InsertPoint =
+ II->getInsertionPointAfterDef();
+ assert(InsertPoint && "insertion after ptrmask should be possible");
+ Instruction *AddrSpaceCast =
+ new AddrSpaceCastInst(II, NewPtrType, "", *InsertPoint);
+ AddrSpaceCast->setDebugLoc(II->getDebugLoc());
+ return AddrSpaceCast;
+ }
+
+ IRBuilder<> B(II);
+ if (DoTruncate) {
+ MaskTy = B.getInt32Ty();
+ MaskOp = B.CreateTrunc(MaskOp, MaskTy);
+ }
Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
- PtrArgUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+ PtrOpUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
PoisonUsesToFix);
- Value *Rewrite =
- TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
- if (Rewrite) {
- assert(Rewrite != II && "cannot modify this pointer operation in place");
- return Rewrite;
- }
- // Leave the ptrmask as-is and insert an addrspacecast after it.
- Instruction *AddrSpaceCast = new AddrSpaceCastInst(II, NewPtr->getType());
- AddrSpaceCast->insertAfter(II->getIterator());
- AddrSpaceCast->setDebugLoc(II->getDebugLoc());
- // If we generated a poison operand for the ptr argument, remove it.
- if (!PoisonUsesToFix->empty() && PoisonUsesToFix->back() == &PtrArgUse)
- PoisonUsesToFix->pop_back();
- return AddrSpaceCast;
+ return B.CreateIntrinsic(Intrinsic::ptrmask, {NewPtr->getType(), MaskTy},
+ {NewPtr, MaskOp});
}
unsigned AS = TTI->getAssumedAddrSpace(I);
>From 91027786576363eec57707441b2512fe8e35eee7 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 22 May 2025 15:51:50 -0500
Subject: [PATCH 5/5] move to helper function; check for integral addrspace;
move addrspace check to TTI
---
.../llvm/Analysis/TargetTransformInfo.h | 5 +
.../llvm/Analysis/TargetTransformInfoImpl.h | 29 ++++++
llvm/lib/Analysis/TargetTransformInfo.cpp | 5 +
.../Transforms/Scalar/InferAddressSpaces.cpp | 97 ++++++++++---------
4 files changed, 91 insertions(+), 45 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1aed98e8f50db..762350ff3ea64 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -492,6 +492,11 @@ class TargetTransformInfo {
bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+ std::optional<uint64_t> getAddrSpaceCastMaskWidth(unsigned FromAS,
+ unsigned ToAS,
+ Value *MaskOP,
+ Instruction *I) const;
+
/// Return true if globals in this address space can have initializers other
/// than `undef`.
bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a80b4c5179bad..0f560b26498b5 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -16,6 +16,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -151,6 +152,34 @@ class TargetTransformInfoImplBase {
}
virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+
+ // Assuming that the cast between the two given addrspaces is not a noop,
+ // calculate the width of the given mask value so that it can be applied to
+ // the destination addrspace. In case it cannot be applied since the cast
+ // between the two addrspaces is invalid or the mask value is larger than the
+ // resulting addrspace bit-width, return an empty optional.
+ //
+ // Note that this currently expects the addrspaces to be integral. In case one
+ // of them isn't, an empty optional is returned.
+ virtual std::optional<uint64_t>
+ getAddrSpaceCastMaskWidth(unsigned FromAS, unsigned ToAS, Value *MaskOp,
+ Instruction *I) const {
+ if (DL.isNonIntegralAddressSpace(FromAS) ||
+ DL.isNonIntegralAddressSpace(ToAS))
+ return std::nullopt;
+ // All valid 64-bit to 32-bit casts work by chopping off the high
+ // bits. Any masking only clearing the low bits will also apply in the new
+ // address space.
+ if (DL.getPointerSizeInBits(FromAS) != 64 ||
+ DL.getPointerSizeInBits(ToAS) != 32)
+ return std::nullopt;
+ // TODO: Do we need to thread more context in here?
+ KnownBits Known = computeKnownBits(MaskOp, DL, 0, nullptr, I);
+ if (Known.countMinLeadingOnes() < 32)
+ return std::nullopt;
+ return 32;
+ }
+
virtual bool
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
return AS == 0;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 0f857399660fe..4e0a126919627 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -329,6 +329,11 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
}
+std::optional<uint64_t> TargetTransformInfo::getAddrSpaceCastMaskWidth(
+ unsigned FromAS, unsigned ToAS, Value *MaskOP, Instruction *I) const {
+ return TTIImpl->getAddrSpaceCastMaskWidth(FromAS, ToAS, MaskOP, I);
+}
+
bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS) const {
return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 5324c93add92d..764931ed486c1 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -206,6 +206,12 @@ class InferAddressSpacesImpl {
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
+ Value *clonePtrMaskWithNewAddressSpace(
+ IntrinsicInst *I, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
+
Value *cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
@@ -635,6 +641,50 @@ static Value *operandWithNewAddressSpaceOrCreatePoison(
return PoisonValue::get(NewPtrTy);
}
+// A helper function for cloneInstructionWithNewAddressSpace. Handles the
+// conversion of a ptrmask intrinsic instruction.
+Value *InferAddressSpacesImpl::clonePtrMaskWithNewAddressSpace(
+ IntrinsicInst *I, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
+ const Use &PtrOpUse = I->getArgOperandUse(0);
+ unsigned OldAddrSpace = PtrOpUse.get()->getType()->getPointerAddressSpace();
+ Value *MaskOp = I->getArgOperand(1);
+ Type *MaskTy = MaskOp->getType();
+
+ std::optional<uint64_t> TruncateToWidth;
+
+ if (!TTI->isNoopAddrSpaceCast(OldAddrSpace, NewAddrSpace)) {
+ // Get the mask width that is applicable to the new addrspace.
+ TruncateToWidth =
+ TTI->getAddrSpaceCastMaskWidth(OldAddrSpace, NewAddrSpace, MaskOp, I);
+ // If there is no such mask, leave the ptrmask as-is and insert an
+ // addrspacecast after it.
+ if (!TruncateToWidth) {
+ std::optional<BasicBlock::iterator> InsertPoint =
+ I->getInsertionPointAfterDef();
+ assert(InsertPoint && "insertion after ptrmask should be possible");
+ Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(I->getType(), NewAddrSpace);
+ Instruction *AddrSpaceCast =
+ new AddrSpaceCastInst(I, NewPtrType, "", *InsertPoint);
+ AddrSpaceCast->setDebugLoc(I->getDebugLoc());
+ return AddrSpaceCast;
+ }
+ }
+
+ IRBuilder<> B(I);
+ if (TruncateToWidth) {
+ MaskTy = B.getIntNTy(*TruncateToWidth);
+ MaskOp = B.CreateTrunc(MaskOp, MaskTy);
+ }
+ Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
+ PtrOpUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+ PoisonUsesToFix);
+ return B.CreateIntrinsic(Intrinsic::ptrmask, {NewPtr->getType(), MaskTy},
+ {NewPtr, MaskOp});
+}
+
// Returns a clone of `I` with its operands converted to those specified in
// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
// operand whose address space needs to be modified might not exist in
@@ -666,51 +716,8 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
// Technically the intrinsic ID is a pointer typed argument, so specially
// handle calls early.
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
- const Use &PtrOpUse = II->getArgOperandUse(0);
- unsigned OldAddrSpace = PtrOpUse.get()->getType()->getPointerAddressSpace();
- Value *MaskOp = II->getArgOperand(1);
- Type *MaskTy = MaskOp->getType();
-
- bool DoTruncate = false;
- bool DoNotConvert = false;
-
- if (!TTI->isNoopAddrSpaceCast(OldAddrSpace, NewAddrSpace)) {
- // All valid 64-bit to 32-bit casts work by chopping off the high
- // bits. Any masking only clearing the low bits will also apply in the new
- // address space.
- if (DL->getPointerSizeInBits(OldAddrSpace) != 64 ||
- DL->getPointerSizeInBits(NewAddrSpace) != 32) {
- DoNotConvert = true;
- } else {
- // TODO: Do we need to thread more context in here?
- KnownBits Known = computeKnownBits(MaskOp, *DL, 0, nullptr, II);
- if (Known.countMinLeadingOnes() < 32)
- DoNotConvert = true;
- else
- DoTruncate = true;
- }
- }
- if (DoNotConvert) {
- // Leave the ptrmask as-is and insert an addrspacecast after it.
- std::optional<BasicBlock::iterator> InsertPoint =
- II->getInsertionPointAfterDef();
- assert(InsertPoint && "insertion after ptrmask should be possible");
- Instruction *AddrSpaceCast =
- new AddrSpaceCastInst(II, NewPtrType, "", *InsertPoint);
- AddrSpaceCast->setDebugLoc(II->getDebugLoc());
- return AddrSpaceCast;
- }
-
- IRBuilder<> B(II);
- if (DoTruncate) {
- MaskTy = B.getInt32Ty();
- MaskOp = B.CreateTrunc(MaskOp, MaskTy);
- }
- Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
- PtrOpUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
- PoisonUsesToFix);
- return B.CreateIntrinsic(Intrinsic::ptrmask, {NewPtr->getType(), MaskTy},
- {NewPtr, MaskOp});
+ return clonePtrMaskWithNewAddressSpace(
+ II, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
}
unsigned AS = TTI->getAssumedAddrSpace(I);
More information about the llvm-commits
mailing list