[llvm] [InferAddressSpaces] Handle unconverted ptrmask (PR #140802)
Robert Imschweiler via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 26 05:55:33 PDT 2025
https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/140802
>From 0b3636f8fb2f72996427e1132455cb6e36dc7bfd Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Tue, 20 May 2025 15:55:36 -0500
Subject: [PATCH] [InferAddressSpaces] Handle unconverted ptrmask
In case a ptrmask cannot be converted to the new address space due to an
unknown mask value, this needs to be detcted and an addrspacecast is
needed to not hinder a future use of the unconverted return value of
ptrmask. Otherwise, users of this value will become invalid by receiving
a nullptr as an operand.
This LLVM defect was identified via the AMD Fuzzing project.
---
.../llvm/Analysis/TargetTransformInfo.h | 12 +
.../llvm/Analysis/TargetTransformInfoImpl.h | 46 ++++
llvm/lib/Analysis/TargetTransformInfo.cpp | 11 +
.../AMDGPU/AMDGPUTargetTransformInfo.cpp | 35 ---
.../Transforms/Scalar/InferAddressSpaces.cpp | 90 +++++--
.../InferAddressSpaces/AMDGPU/ptrmask.ll | 236 +++++++++++++++++-
6 files changed, 367 insertions(+), 63 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 318528021ef75..36062e69ef107 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -499,6 +499,18 @@ class TargetTransformInfo {
LLVM_ABI bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+ // Given an address space cast of the given pointer value, calculate the known
+ // bits of the source pointer in the source addrspace and the destination
+ // pointer in the destination addrspace.
+ LLVM_ABI std::pair<KnownBits, KnownBits>
+ computeKnownBitsAddrSpaceCast(unsigned ToAS, const Value &PtrOp) const;
+
+ // Given an address space cast, calculate the known bits of the resulting ptr
+ // in the destination addrspace using the known bits of the source pointer in
+ // the source addrspace.
+ LLVM_ABI KnownBits computeKnownBitsAddrSpaceCast(
+ unsigned FromAS, unsigned ToAS, const KnownBits &FromPtrBits) const;
+
/// Return true if globals in this address space can have initializers other
/// than `undef`.
LLVM_ABI bool
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index bb299becfdcba..3bc99fe0246a9 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -16,6 +16,7 @@
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/GetElementPtrTypeIterator.h"
@@ -151,6 +152,51 @@ class TargetTransformInfoImplBase {
}
virtual bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; }
+
+ virtual std::pair<KnownBits, KnownBits>
+ computeKnownBitsAddrSpaceCast(unsigned ToAS, const Value &PtrOp) const {
+ const Type *PtrTy = PtrOp.getType();
+ assert(PtrTy->isPtrOrPtrVectorTy() &&
+ "expected pointer or pointer vector type");
+ unsigned FromAS = PtrTy->getPointerAddressSpace();
+
+ if (DL.isNonIntegralAddressSpace(FromAS))
+ return std::pair(KnownBits(DL.getPointerSizeInBits(FromAS)),
+ KnownBits(DL.getPointerSizeInBits(ToAS)));
+
+ KnownBits FromPtrBits;
+ if (const AddrSpaceCastInst *CastI = dyn_cast<AddrSpaceCastInst>(&PtrOp)) {
+ std::pair<KnownBits, KnownBits> KB = computeKnownBitsAddrSpaceCast(
+ CastI->getDestAddressSpace(), *CastI->getPointerOperand());
+ FromPtrBits = KB.second;
+ } else if (FromAS == 0 && PatternMatch::match(&PtrOp, PatternMatch::m_Zero())) {
+ // For addrspace 0, we know that a null pointer has the value 0.
+ FromPtrBits = KnownBits::makeConstant(
+ APInt::getZero(DL.getPointerSizeInBits(FromAS)));
+ } else {
+ FromPtrBits = computeKnownBits(&PtrOp, DL, nullptr);
+ }
+
+ KnownBits ToPtrBits =
+ computeKnownBitsAddrSpaceCast(FromAS, ToAS, FromPtrBits);
+
+ return std::pair(FromPtrBits, ToPtrBits);
+ }
+
+ virtual KnownBits
+ computeKnownBitsAddrSpaceCast(unsigned FromAS, unsigned ToAS,
+ const KnownBits &FromPtrBits) const {
+ unsigned ToASBitSize = DL.getPointerSizeInBits(ToAS);
+
+ if (DL.isNonIntegralAddressSpace(FromAS))
+ return KnownBits(ToASBitSize);
+
+ // By default, we assume that all valid "larger" (e.g. 64-bit) to "smaller"
+ // (e.g. 32-bit) casts work by chopping off the high bits.
+ // By default, we do not assume that null results in null again.
+ return FromPtrBits.anyextOrTrunc(ToASBitSize);
+ }
+
virtual bool
canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const {
return AS == 0;
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index b9dec59a1ecad..1b2d35ad67eb1 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -330,6 +330,17 @@ bool TargetTransformInfo::isNoopAddrSpaceCast(unsigned FromAS,
return TTIImpl->isNoopAddrSpaceCast(FromAS, ToAS);
}
+std::pair<KnownBits, KnownBits>
+TargetTransformInfo::computeKnownBitsAddrSpaceCast(unsigned ToAS,
+ const Value &PtrOp) const {
+ return TTIImpl->computeKnownBitsAddrSpaceCast(ToAS, PtrOp);
+}
+
+KnownBits TargetTransformInfo::computeKnownBitsAddrSpaceCast(
+ unsigned FromAS, unsigned ToAS, const KnownBits &FromPtrBits) const {
+ return TTIImpl->computeKnownBitsAddrSpaceCast(FromAS, ToAS, FromPtrBits);
+}
+
bool TargetTransformInfo::canHaveNonUndefGlobalInitializerInAddressSpace(
unsigned AS) const {
return TTIImpl->canHaveNonUndefGlobalInitializerInAddressSpace(AS);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 03d16fdd54c42..558b7c2491e8b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -1150,41 +1150,6 @@ Value *GCNTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
return NewVal;
}
- case Intrinsic::ptrmask: {
- unsigned OldAS = OldV->getType()->getPointerAddressSpace();
- unsigned NewAS = NewV->getType()->getPointerAddressSpace();
- Value *MaskOp = II->getArgOperand(1);
- Type *MaskTy = MaskOp->getType();
-
- bool DoTruncate = false;
-
- const GCNTargetMachine &TM =
- static_cast<const GCNTargetMachine &>(getTLI()->getTargetMachine());
- if (!TM.isNoopAddrSpaceCast(OldAS, NewAS)) {
- // All valid 64-bit to 32-bit casts work by chopping off the high
- // bits. Any masking only clearing the low bits will also apply in the new
- // address space.
- if (DL.getPointerSizeInBits(OldAS) != 64 ||
- DL.getPointerSizeInBits(NewAS) != 32)
- return nullptr;
-
- // TODO: Do we need to thread more context in here?
- KnownBits Known = computeKnownBits(MaskOp, DL, nullptr, II);
- if (Known.countMinLeadingOnes() < 32)
- return nullptr;
-
- DoTruncate = true;
- }
-
- IRBuilder<> B(II);
- if (DoTruncate) {
- MaskTy = B.getInt32Ty();
- MaskOp = B.CreateTrunc(MaskOp, MaskTy);
- }
-
- return B.CreateIntrinsic(Intrinsic::ptrmask, {NewV->getType(), MaskTy},
- {NewV, MaskOp});
- }
case Intrinsic::amdgcn_flat_atomic_fmax_num:
case Intrinsic::amdgcn_flat_atomic_fmin_num: {
Type *DestTy = II->getType();
diff --git a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
index 3ad87545953ff..b35e868669b7b 100644
--- a/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
+++ b/llvm/lib/Transforms/Scalar/InferAddressSpaces.cpp
@@ -206,6 +206,12 @@ class InferAddressSpacesImpl {
bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const;
+ Value *clonePtrMaskWithNewAddressSpace(
+ IntrinsicInst *I, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const;
+
Value *cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
@@ -651,6 +657,69 @@ static Value *operandWithNewAddressSpaceOrCreatePoison(
return PoisonValue::get(NewPtrTy);
}
+// A helper function for cloneInstructionWithNewAddressSpace. Handles the
+// conversion of a ptrmask intrinsic instruction.
+Value *InferAddressSpacesImpl::clonePtrMaskWithNewAddressSpace(
+ IntrinsicInst *I, unsigned NewAddrSpace,
+ const ValueToValueMapTy &ValueWithNewAddrSpace,
+ const PredicatedAddrSpaceMapTy &PredicatedAS,
+ SmallVectorImpl<const Use *> *PoisonUsesToFix) const {
+ const Use &PtrOpUse = I->getArgOperandUse(0);
+ unsigned OldAddrSpace = PtrOpUse->getType()->getPointerAddressSpace();
+ Value *MaskOp = I->getArgOperand(1);
+ Type *MaskTy = MaskOp->getType();
+
+ std::optional<KnownBits> OldPtrBits;
+ std::optional<KnownBits> NewPtrBits;
+ if (!TTI->isNoopAddrSpaceCast(OldAddrSpace, NewAddrSpace)) {
+ if (std::optional<std::pair<KnownBits, KnownBits>> KB =
+ TTI->computeKnownBitsAddrSpaceCast(NewAddrSpace, *PtrOpUse.get())) {
+ OldPtrBits = KB->first;
+ NewPtrBits = KB->second;
+ }
+ }
+
+ // If the pointers in both addrspaces have a bitwise representation and if the
+ // representation of the new pointer is smaller (fewer bits) than the old one,
+ // check if the mask is applicable to the ptr in the new addrspace. Any
+ // masking only clearing the low bits will also apply in the new addrspace
+ // Note: checking if the mask clears high bits is not sufficient as those
+ // might have already been 0 in the old ptr.
+ if (NewPtrBits && OldPtrBits->getBitWidth() > NewPtrBits->getBitWidth()) {
+ KnownBits MaskBits =
+ computeKnownBits(MaskOp, *DL, /*AssumptionCache=*/nullptr, I);
+ // Set all unknown bits of the old ptr to 1, so that we are conservative in
+ // checking which bits are cleared by the mask.
+ OldPtrBits->One |= ~OldPtrBits->Zero;
+ // Check which bits are cleared by the mask in the old ptr.
+ KnownBits ClearedBits = KnownBits::sub(*OldPtrBits, *OldPtrBits & MaskBits);
+
+ // If the mask isn't applicable to the new ptr, leave the ptrmask as-is and
+ // insert an addrspacecast after it.
+ if (ClearedBits.countMaxActiveBits() > NewPtrBits->countMaxActiveBits()) {
+ std::optional<BasicBlock::iterator> InsertPoint =
+ I->getInsertionPointAfterDef();
+ assert(InsertPoint && "insertion after ptrmask should be possible");
+ Type *NewPtrType = getPtrOrVecOfPtrsWithNewAS(I->getType(), NewAddrSpace);
+ Instruction *AddrSpaceCast =
+ new AddrSpaceCastInst(I, NewPtrType, "", *InsertPoint);
+ AddrSpaceCast->setDebugLoc(I->getDebugLoc());
+ return AddrSpaceCast;
+ }
+ }
+
+ IRBuilder<> B(I);
+ if (NewPtrBits) {
+ MaskTy = MaskTy->getWithNewBitWidth(NewPtrBits->getBitWidth());
+ MaskOp = B.CreateTrunc(MaskOp, MaskTy);
+ }
+ Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
+ PtrOpUse, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS,
+ PoisonUsesToFix);
+ return B.CreateIntrinsic(Intrinsic::ptrmask, {NewPtr->getType(), MaskTy},
+ {NewPtr, MaskOp});
+}
+
// Returns a clone of `I` with its operands converted to those specified in
// ValueWithNewAddrSpace. Due to potential cycles in the data flow graph, an
// operand whose address space needs to be modified might not exist in
@@ -660,9 +729,6 @@ static Value *operandWithNewAddressSpaceOrCreatePoison(
// Note that we do not necessarily clone `I`, e.g., if it is an addrspacecast
// from a pointer whose type already matches. Therefore, this function returns a
// Value* instead of an Instruction*.
-//
-// This may also return nullptr in the case the instruction could not be
-// rewritten.
Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
Instruction *I, unsigned NewAddrSpace,
const ValueToValueMapTy &ValueWithNewAddrSpace,
@@ -683,17 +749,8 @@ Value *InferAddressSpacesImpl::cloneInstructionWithNewAddressSpace(
// Technically the intrinsic ID is a pointer typed argument, so specially
// handle calls early.
assert(II->getIntrinsicID() == Intrinsic::ptrmask);
- Value *NewPtr = operandWithNewAddressSpaceOrCreatePoison(
- II->getArgOperandUse(0), NewAddrSpace, ValueWithNewAddrSpace,
- PredicatedAS, PoisonUsesToFix);
- Value *Rewrite =
- TTI->rewriteIntrinsicWithAddressSpace(II, II->getArgOperand(0), NewPtr);
- if (Rewrite) {
- assert(Rewrite != II && "cannot modify this pointer operation in place");
- return Rewrite;
- }
-
- return nullptr;
+ return clonePtrMaskWithNewAddressSpace(
+ II, NewAddrSpace, ValueWithNewAddrSpace, PredicatedAS, PoisonUsesToFix);
}
unsigned AS = TTI->getAssumedAddrSpace(I);
@@ -1331,7 +1388,10 @@ bool InferAddressSpacesImpl::rewriteWithNewAddressSpaces(
unsigned OperandNo = PoisonUse->getOperandNo();
assert(isa<PoisonValue>(NewV->getOperand(OperandNo)));
- NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(PoisonUse->get()));
+ WeakTrackingVH NewOp = ValueWithNewAddrSpace.lookup(PoisonUse->get());
+ assert(NewOp &&
+ "poison replacements in ValueWithNewAddrSpace shouldn't be null");
+ NewV->setOperand(OperandNo, NewOp);
}
SmallVector<Instruction *, 16> DeadInstructions;
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index 6ef926f935830..644b8cdd0d2e4 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -3,9 +3,10 @@
define i8 @ptrmask_cast_local_to_flat(ptr addrspace(3) %src.ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -14,11 +15,49 @@ define i8 @ptrmask_cast_local_to_flat(ptr addrspace(3) %src.ptr, i64 %mask) {
ret i8 %load
}
+define <3 x ptr addrspace(3)> @ptrmask_vector_cast_local_to_flat(<3 x ptr addrspace(3)> %src.ptr, <3 x i64> %mask) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_to_flat(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <3 x ptr addrspace(3)> [[SRC_PTR:%.*]] to <3 x ptr>
+; CHECK-NEXT: [[MASKED:%.*]] = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> [[CAST]], <3 x i64> [[MASK:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast <3 x ptr> [[MASKED]] to <3 x ptr addrspace(3)>
+; CHECK-NEXT: ret <3 x ptr addrspace(3)> [[TMP1]]
+;
+ %cast = addrspacecast <3 x ptr addrspace(3)> %src.ptr to <3 x ptr>
+ %masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> %cast, <3 x i64> %mask)
+ %cast2 = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
+ ret <3 x ptr addrspace(3)> %cast2
+}
+
+; Casting null does not necessarily result in null again.
+define i8 @ptrmask_cast_local_null_to_flat(i64 %mask) {
+; CHECK-LABEL: @ptrmask_cast_local_null_to_flat(
+; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) null to ptr), i64 [[MASK:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) null to ptr), i64 %mask)
+ %load = load i8, ptr %masked
+ ret i8 %load
+}
+
+define <3 x ptr addrspace(3)> @ptrmask_vector_cast_local_null_to_flat(<3 x i64> %mask) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_null_to_flat(
+; CHECK-NEXT: [[MASKED:%.*]] = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> <ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(3) null to ptr), ptr addrspacecast (ptr addrspace(3) null to ptr)>, <3 x i64> [[MASK:%.*]])
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <3 x ptr> [[MASKED]] to <3 x ptr addrspace(3)>
+; CHECK-NEXT: ret <3 x ptr addrspace(3)> [[CAST]]
+;
+ %masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> addrspacecast (<3 x ptr addrspace(3)> <ptr addrspace(3) null, ptr addrspace(3) null, ptr addrspace(3) null> to <3 x ptr>), <3 x i64> %mask)
+ %cast = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
+ ret <3 x ptr addrspace(3)> %cast
+}
+
define i8 @ptrmask_cast_private_to_flat(ptr addrspace(5) %src.ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_private_to_flat(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(5) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(5)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(5) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(5) %src.ptr to ptr
@@ -29,9 +68,10 @@ define i8 @ptrmask_cast_private_to_flat(ptr addrspace(5) %src.ptr, i64 %mask) {
define i8 @ptrmask_cast_region_to_flat(ptr addrspace(2) %src.ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_region_to_flat(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(2)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(2) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(2) %src.ptr to ptr
@@ -77,6 +117,46 @@ define i8 @ptrmask_cast_flat_to_local(ptr %ptr, i64 %mask) {
ret i8 %load
}
+define <3 x ptr addrspace(3)> @ptrmask_vector_cast_flat_to_local(<3 x ptr> %src.ptr, <3 x i64> %mask) {
+; CHECK-LABEL: @ptrmask_vector_cast_flat_to_local(
+; CHECK-NEXT: [[SRC_PTR:%.*]] = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> [[SRC_PTR1:%.*]], <3 x i64> [[MASK:%.*]])
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <3 x ptr> [[SRC_PTR]] to <3 x ptr addrspace(3)>
+; CHECK-NEXT: ret <3 x ptr addrspace(3)> [[CAST]]
+;
+ %masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> %src.ptr, <3 x i64> %mask)
+ %cast = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
+ ret <3 x ptr addrspace(3)> %cast
+}
+
+; Casting null *does* result in null again if addrspace 0 is casted to a
+; smaller addrspace (by default we assume that casting to a smaller addrspace =
+; truncating)
+define i8 @ptrmask_cast_flat_null_to_local(i64 %mask) {
+; CHECK-LABEL: @ptrmask_cast_flat_null_to_local(
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr null, i64 %mask)
+ %cast = addrspacecast ptr %masked to ptr addrspace(3)
+ %load = load i8, ptr addrspace(3) %cast
+ ret i8 %load
+}
+
+define i8 @ptrmask_vector_cast_flat_null_to_local(<3 x i64> %mask, i32 %ptridx, i32 %idx) {
+; CHECK-LABEL: @ptrmask_vector_cast_flat_null_to_local(
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(3) addrspacecast (ptr null to ptr addrspace(3)), i32 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> <ptr null, ptr null, ptr null>, <3 x i64> %mask)
+ %cast = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
+ %ptr = extractelement <3 x ptr addrspace(3)> %cast, i32 %ptridx
+ %gep = getelementptr i8, ptr addrspace(3) %ptr, i32 %idx
+ %load = load i8, ptr addrspace(3) %gep
+ ret i8 %load
+}
+
+
define i8 @ptrmask_cast_flat_to_private(ptr %ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_flat_to_private(
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
@@ -109,7 +189,8 @@ define i8 @ptrmask_cast_flat_to_global(ptr %ptr, i64 %mask) {
define i8 @ptrmask_cast_local_to_flat_global(i64 %mask) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_global(
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) @lds0 to ptr), i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%masked = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) @lds0 to ptr), i64 %mask)
@@ -150,7 +231,8 @@ define i8 @multi_ptrmask_cast_local_to_flat(ptr addrspace(3) %src.ptr, i64 %mask
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
; CHECK-NEXT: [[LOAD0:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]]
; CHECK-NEXT: ret i8 [[ADD]]
;
@@ -167,7 +249,8 @@ define i8 @multi_ptrmask_cast_region_to_flat(ptr addrspace(2) %src.ptr, i64 %mas
; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(2) [[SRC_PTR:%.*]] to ptr
; CHECK-NEXT: [[LOAD0:%.*]] = load i8, ptr addrspace(2) [[SRC_PTR]], align 1
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
-; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(2)
+; CHECK-NEXT: [[LOAD1:%.*]] = load i8, ptr addrspace(2) [[TMP1]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[LOAD0]], [[LOAD1]]
; CHECK-NEXT: ret i8 [[ADD]]
;
@@ -182,9 +265,10 @@ define i8 @multi_ptrmask_cast_region_to_flat(ptr addrspace(2) %src.ptr, i64 %mas
; Do not fold this since it clears a single high bit.
define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 -4294967297)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -193,12 +277,26 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffeffffffff(ptr addrspace(
ret i8 %load
}
+define <3 x ptr addrspace(3)> @ptrmask_vector_cast_local_to_flat_const_mask_fffffffeffffffff(<3 x ptr addrspace(3)> %src.ptr) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_to_flat_const_mask_fffffffeffffffff(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <3 x ptr addrspace(3)> [[SRC_PTR:%.*]] to <3 x ptr>
+; CHECK-NEXT: [[MASKED:%.*]] = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> [[CAST]], <3 x i64> splat (i64 -4294967297))
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast <3 x ptr> [[MASKED]] to <3 x ptr addrspace(3)>
+; CHECK-NEXT: ret <3 x ptr addrspace(3)> [[TMP1]]
+;
+ %cast = addrspacecast <3 x ptr addrspace(3)> %src.ptr to <3 x ptr>
+ %masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> %cast, <3 x i64> <i64 -4294967297, i64 -4294967297, i64 -4294967297>)
+ %cast2 = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
+ ret <3 x ptr addrspace(3)> %cast2
+}
+
; Do not fold this since it clears a single high bit.
define i8 @ptrmask_cast_local_to_flat_const_mask_7fffffffffffffff(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_7fffffffffffffff(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR1:%.*]] to ptr
; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 9223372036854775807)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
+; CHECK-NEXT: [[SRC_PTR:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -207,6 +305,20 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_7fffffffffffffff(ptr addrspace(
ret i8 %load
}
+; Do not fold: casting null does not necessarily result in null again
+define i8 @ptrmask_cast_local_null_to_flat_const_mask_7fffffffffffffff() {
+; CHECK-LABEL: @ptrmask_cast_local_null_to_flat_const_mask_7fffffffffffffff(
+; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr addrspacecast (ptr addrspace(3) null to ptr), i64 9223372036854775807)
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) null to ptr
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 9223372036854775807)
+ %load = load i8, ptr %masked
+ ret i8 %load
+}
+
define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffff00000000(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffff00000000(
; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 0)
@@ -219,6 +331,28 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffff00000000(ptr addrspace(
ret i8 %load
}
+define <3 x ptr addrspace(3)> @ptrmask_vector_cast_local_to_flat_const_mask_ffffffff00000000(<3 x ptr addrspace(3)> %src.ptr) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_to_flat_const_mask_ffffffff00000000(
+; CHECK-NEXT: [[TMP1:%.*]] = call <3 x ptr addrspace(3)> @llvm.ptrmask.v3p3.v3i32(<3 x ptr addrspace(3)> [[SRC_PTR:%.*]], <3 x i32> zeroinitializer)
+; CHECK-NEXT: ret <3 x ptr addrspace(3)> [[TMP1]]
+;
+ %cast = addrspacecast <3 x ptr addrspace(3)> %src.ptr to <3 x ptr>
+ %masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> %cast, <3 x i64> <i64 -4294967296, i64 -4294967296, i64 -4294967296>)
+ %cast2 = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
+ ret <3 x ptr addrspace(3)> %cast2
+}
+
+define i8 @ptrmask_cast_local_null_to_flat_const_mask_ffffffff00000000() {
+; CHECK-LABEL: @ptrmask_cast_local_null_to_flat_const_mask_ffffffff00000000(
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) null, align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) null to ptr
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 -4294967296)
+ %load = load i8, ptr %masked
+ ret i8 %load
+}
+
define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffff80000000(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffff80000000(
; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 -2147483648)
@@ -244,6 +378,17 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffff0000(ptr addrspace(
ret i8 %load
}
+define <3 x ptr addrspace(3)> @ptrmask_vector_cast_local_to_flat_const_mask_ffffffffffff0000(<3 x ptr addrspace(3)> %src.ptr) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_to_flat_const_mask_ffffffffffff0000(
+; CHECK-NEXT: [[TMP1:%.*]] = call <3 x ptr addrspace(3)> @llvm.ptrmask.v3p3.v3i32(<3 x ptr addrspace(3)> [[SRC_PTR:%.*]], <3 x i32> splat (i32 -65536))
+; CHECK-NEXT: ret <3 x ptr addrspace(3)> [[TMP1]]
+;
+ %cast = addrspacecast <3 x ptr addrspace(3)> %src.ptr to <3 x ptr>
+ %masked = call <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr> %cast, <3 x i64> <i64 -65536, i64 -65536, i64 -65536>)
+ %cast2 = addrspacecast <3 x ptr> %masked to <3 x ptr addrspace(3)>
+ ret <3 x ptr addrspace(3)> %cast2
+}
+
define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffff00(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffff00(
; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 -256)
@@ -343,7 +488,72 @@ define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr,
ret i8 %load
}
+define <2 x ptr addrspace(3)> @ptrmask_vector_cast_local_to_flat_load_range_mask(<2 x ptr addrspace(3)> %src.ptr, ptr addrspace(1) %mask.ptr) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_to_flat_load_range_mask(
+; CHECK-NEXT: [[LOAD_MASK:%.*]] = load <2 x i64>, ptr addrspace(1) [[MASK_PTR:%.*]], align 16, !range [[RNG0]]
+; CHECK-NEXT: [[TMP1:%.*]] = trunc <2 x i64> [[LOAD_MASK]] to <2 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x ptr addrspace(3)> @llvm.ptrmask.v2p3.v2i32(<2 x ptr addrspace(3)> [[SRC_PTR:%.*]], <2 x i32> [[TMP1]])
+; CHECK-NEXT: ret <2 x ptr addrspace(3)> [[TMP2]]
+;
+ %load.mask = load <2 x i64>, ptr addrspace(1) %mask.ptr, align 16, !range !0
+ %cast = addrspacecast <2 x ptr addrspace(3)> %src.ptr to <2 x ptr>
+ %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %cast, <2 x i64> %load.mask)
+ %cast2 = addrspacecast <2 x ptr> %masked to <2 x ptr addrspace(3)>
+ ret <2 x ptr addrspace(3)> %cast2
+}
+
+; Non-const masks with no known range should not prevent other ptr-manipulating
+; instructions (such as gep) from being converted.
+define i8 @ptrmask_cast_local_to_flat_unknown_mask(ptr addrspace(3) %src.ptr, i64 %mask, i64 %idx) {
+; CHECK-LABEL: @ptrmask_cast_local_to_flat_unknown_mask(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
+; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP1]], i64 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 %mask)
+ %gep = getelementptr i8, ptr %masked, i64 %idx
+ %load = load i8, ptr %gep
+ ret i8 %load
+}
+
+define <2 x ptr addrspace(3)> @ptrmask_vector_cast_local_to_flat_unknown_mask(<2 x ptr addrspace(3)> %src.ptr, <2 x i64> %mask) {
+; CHECK-LABEL: @ptrmask_vector_cast_local_to_flat_unknown_mask(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast <2 x ptr addrspace(3)> [[SRC_PTR:%.*]] to <2 x ptr>
+; CHECK-NEXT: [[MASKED:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[CAST]], <2 x i64> [[MASK:%.*]])
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast <2 x ptr> [[MASKED]] to <2 x ptr addrspace(3)>
+; CHECK-NEXT: ret <2 x ptr addrspace(3)> [[TMP1]]
+;
+ %cast = addrspacecast <2 x ptr addrspace(3)> %src.ptr to <2 x ptr>
+ %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %cast, <2 x i64> %mask)
+ %cast2 = addrspacecast <2 x ptr> %masked to <2 x ptr addrspace(3)>
+ ret <2 x ptr addrspace(3)> %cast2
+}
+
+define i8 @interleaved_ptrmask_cast_local_to_flat_unknown_mask(ptr addrspace(3) %src.ptr, i64 %mask, i64 %idx) {
+; CHECK-LABEL: @interleaved_ptrmask_cast_local_to_flat_unknown_mask(
+; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[GEP1:%.*]] to ptr
+; CHECK-NEXT: store i64 [[MASK:%.*]], ptr addrspace(3) [[GEP1]], align 8
+; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[CAST]], i64 [[MASK]])
+; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[MASKED]] to ptr addrspace(3)
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, ptr addrspace(3) [[TMP1]], i64 [[IDX:%.*]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[GEP]], align 1
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
+ store i64 %mask, ptr %cast
+ %masked = call ptr @llvm.ptrmask.p0.i64(ptr %cast, i64 %mask)
+ %gep = getelementptr i8, ptr %masked, i64 %idx
+ %load = load i8, ptr %gep
+ ret i8 %load
+}
+
declare ptr @llvm.ptrmask.p0.i64(ptr, i64) #0
+declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>) #0
+declare <3 x ptr> @llvm.ptrmask.v3p0.v3i64(<3 x ptr>, <3 x i64>) #0
declare ptr addrspace(5) @llvm.ptrmask.p5.i32(ptr addrspace(5), i32) #0
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
More information about the llvm-commits
mailing list