[llvm] [IR] Require that ptrmask mask matches pointer index size (PR #69343)
Nikita Popov via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 19 00:43:40 PDT 2023
https://github.com/nikic updated https://github.com/llvm/llvm-project/pull/69343
>From cae33d16bde61ddaa675cf57a9246601a2cb5a1a Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Tue, 17 Oct 2023 17:04:41 +0200
Subject: [PATCH 1/2] [IR] Require that ptrmask mask matches pointer index size
Currently, the ptrmask intrinsic allows the mask to have any
size and will zero-extend or truncate it to the pointer size.
However, per the specified semantics, what we would actually
need to do is to first zero-extend or truncate it to the pointer
index size and then 1-extend it to the pointer size. This seems
to leave a lot of room for error, so this patch proposes to
make the intrinsic stricter:
It now requires that the mask type matches the pointer index type
-- a zext or trunc can be done explicitly in IR and should not
be part of the intrinsic. Also spell out that the mask is 1-extended
to the pointer size if we're talking about the integer representation
(this is implied by the GEP expansion).
---
llvm/docs/LangRef.rst | 21 ++-
llvm/lib/Analysis/ValueTracking.cpp | 4 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 7 +-
llvm/lib/IR/Verifier.cpp | 4 +
.../InstCombine/InstCombineCalls.cpp | 13 +-
llvm/test/CodeGen/AArch64/lower-ptrmask.ll | 15 --
.../AMDGPU/GlobalISel/irtranslator-ptrmask.ll | 108 ------------
llvm/test/CodeGen/AMDGPU/ptrmask.ll | 163 +-----------------
llvm/test/CodeGen/X86/lower-ptrmask.ll | 16 --
.../InferAddressSpaces/AMDGPU/ptrmask.ll | 16 --
.../test/Transforms/InferAlignment/ptrmask.ll | 28 +++
.../test/Transforms/InstCombine/align-addr.ll | 28 ---
.../InstCombine/consecutive-ptrmask.ll | 52 ++----
llvm/test/Verifier/ptrmask.ll | 18 ++
14 files changed, 96 insertions(+), 397 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2035091be5a6840..53dc00a41e01e96 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -26952,7 +26952,8 @@ Arguments:
""""""""""
The first argument is a pointer or vector of pointers. The second argument is
-an integer or vector of integers.
+an integer or vector of integers with the same bit width as the index type
+size of the first argument.
Overview:
""""""""""
@@ -26965,10 +26966,20 @@ to facilitate alias analysis and underlying-object detection.
Semantics:
""""""""""
-The result of ``ptrmask(ptr, mask)`` is equivalent to
-``getelementptr ptr, (ptrtoint(ptr) & mask) - ptrtoint(ptr)``. Both the returned
-pointer(s) and the first argument are based on the same underlying object (for more
-information on the *based on* terminology see
+The result of ``ptrmask(%ptr, %mask)`` is equivalent to the following expansion,
+where ``iPtrIdx`` is the index type size of the pointer::
+
+ %intptr = ptrtoint ptr %ptr to iPtrIdx ; this may truncate
+ %masked = and iPtrIdx %intptr, %mask
+ %diff = sub iPtrIdx %masked, %intptr
+ %result = getelementptr i8, ptr %ptr, iPtrIdx %diff
+
+Considering this as an operation on the integer representation of the pointer,
+if the pointer index type size is smaller than the pointer type size, this
+implies that the mask is extended with 1 bits to the pointer type size.
+
+Both the returned pointer(s) and the first argument are based on the same
+underlying object (for more information on the *based on* terminology see
:ref:`the pointer aliasing rules <pointeraliasing>`). If the bitwidth of the
mask argument does not match the pointer size of the target, the mask is
zero-extended or truncated accordingly.
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 1e0281b3f1bd79e..9925fdabb5aa993 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -1637,8 +1637,8 @@ static void computeKnownBitsFromOperator(const Operator *I,
const Value *Mask = I->getOperand(1);
Known2 = KnownBits(Mask->getType()->getScalarSizeInBits());
computeKnownBits(Mask, Known2, Depth + 1, Q);
- // This is basically a pointer typed and.
- Known &= Known2.zextOrTrunc(Known.getBitWidth());
+ // TODO: 1-extend would be more precise.
+ Known &= Known2.anyextOrTrunc(BitWidth);
break;
}
case Intrinsic::x86_sse42_crc32_64_64:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 4bb0ba6f083109b..4858e17e2649439 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7420,11 +7420,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
case Intrinsic::ptrmask: {
SDValue Ptr = getValue(I.getOperand(0));
- SDValue Const = getValue(I.getOperand(1));
+ SDValue Mask = getValue(I.getOperand(1));
EVT PtrVT = Ptr.getValueType();
- setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr,
- DAG.getZExtOrTrunc(Const, sdl, PtrVT)));
+ assert(PtrVT == Mask.getValueType() &&
+ "Pointers with different index type are not supported by SDAG");
+ setValue(&I, DAG.getNode(ISD::AND, sdl, PtrVT, Ptr, Mask));
return;
}
case Intrinsic::threadlocal_address: {
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 5a3328416db3eb0..6059baf75832863 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5964,6 +5964,10 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"llvm.ptrmask intrinsic arguments must have the same number of "
"elements",
&Call);
+ Check(DL.getIndexTypeSizeInBits(Ty0) == Ty1->getScalarSizeInBits(),
+ "llvm.ptrmask intrinsic second argument bitwidth must match "
+ "pointer index type size of first argument",
+ &Call);
break;
}
};
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index e29fb869686ca0b..11b0083828bf6f6 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1966,13 +1966,12 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
if (match(II->getArgOperand(0),
m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
m_Value(InnerMask))))) {
- if (II->getArgOperand(1)->getType() == InnerMask->getType()) {
- Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
- return replaceInstUsesWith(
- *II,
- Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
- {InnerPtr, NewMask}));
- }
+ assert(II->getArgOperand(1)->getType() == InnerMask->getType() &&
+ "Mask types must match");
+ Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
+ return replaceInstUsesWith(
+ *II, Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
+ {InnerPtr, NewMask}));
}
break;
}
diff --git a/llvm/test/CodeGen/AArch64/lower-ptrmask.ll b/llvm/test/CodeGen/AArch64/lower-ptrmask.ll
index aceabf27d083f94..30dacea4286af73 100644
--- a/llvm/test/CodeGen/AArch64/lower-ptrmask.ll
+++ b/llvm/test/CodeGen/AArch64/lower-ptrmask.ll
@@ -12,18 +12,3 @@ define ptr @test1(ptr %src) {
%ptr = call ptr @llvm.ptrmask.p0.i64(ptr %src, i64 72057594037927928)
ret ptr %ptr
}
-
-declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
-
-; CHECK-LABEL: name: test2
-; CHECK: %0:gpr64 = COPY $x0
-; CHECK-NEXT: %1:gpr32 = MOVi32imm 10000
-; CHECK-NEXT: %2:gpr64 = SUBREG_TO_REG 0, killed %1, %subreg.sub_32
-; CHECK-NEXT: %3:gpr64 = ANDXrr %0, killed %2
-; CHECK-NEXT: $x0 = COPY %3
-; CHECK-NEXT: RET_ReallyLR implicit $x0
-
-define ptr @test2(ptr %src) {
- %ptr = call ptr @llvm.ptrmask.p0.i32(ptr %src, i32 10000)
- ret ptr %ptr
-}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
index 8eb0658f8023b1d..7a8e521817a37f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-ptrmask.ll
@@ -21,78 +21,6 @@ define ptr @ptrmask_flat_i64(ptr %ptr, i64 %mask) {
ret ptr %masked
}
-define ptr @ptrmask_flat_i32(ptr %ptr, i32 %mask) {
- ; CHECK-LABEL: name: ptrmask_flat_i32
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[COPY2]](s32)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
- %masked = call ptr @llvm.ptrmask.p0.i32(ptr %ptr, i32 %mask)
- ret ptr %masked
-}
-
-define ptr @ptrmask_flat_i16(ptr %ptr, i16 %mask) {
- ; CHECK-LABEL: name: ptrmask_flat_i16
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s16)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
- %masked = call ptr @llvm.ptrmask.p0.i16(ptr %ptr, i16 %mask)
- ret ptr %masked
-}
-
-define ptr @ptrmask_flat_i1(ptr %ptr, i1 %mask) {
- ; CHECK-LABEL: name: ptrmask_flat_i1
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY2]](s32)
- ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p0) = G_PTRMASK [[MV]], [[TRUNC]](s1)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[PTRMASK]](p0)
- ; CHECK-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; CHECK-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1
- %masked = call ptr @llvm.ptrmask.p0.i1(ptr %ptr, i1 %mask)
- ret ptr %masked
-}
-
-define ptr addrspace(3) @ptrmask_local_i64(ptr addrspace(3) %ptr, i64 %mask) {
- ; CHECK-LABEL: name: ptrmask_local_i64
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
- ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[MV]](s64)
- ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3)
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0
- %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask)
- ret ptr addrspace(3) %masked
-}
-
define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) {
; CHECK-LABEL: name: ptrmask_local_i32
; CHECK: bb.1 (%ir-block.0):
@@ -107,36 +35,6 @@ define ptr addrspace(3) @ptrmask_local_i32(ptr addrspace(3) %ptr, i32 %mask) {
ret ptr addrspace(3) %masked
}
-define ptr addrspace(3) @ptrmask_local_i16(ptr addrspace(3) %ptr, i16 %mask) {
- ; CHECK-LABEL: name: ptrmask_local_i16
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s16)
- ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3)
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0
- %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3) %ptr, i16 %mask)
- ret ptr addrspace(3) %masked
-}
-
-define ptr addrspace(3) @ptrmask_local_i1(ptr addrspace(3) %ptr, i1 %mask) {
- ; CHECK-LABEL: name: ptrmask_local_i1
- ; CHECK: bb.1 (%ir-block.0):
- ; CHECK-NEXT: liveins: $vgpr0, $vgpr1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[PTRMASK:%[0-9]+]]:_(p3) = G_PTRMASK [[COPY]], [[TRUNC]](s1)
- ; CHECK-NEXT: $vgpr0 = COPY [[PTRMASK]](p3)
- ; CHECK-NEXT: SI_RETURN implicit $vgpr0
- %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3) %ptr, i1 %mask)
- ret ptr addrspace(3) %masked
-}
-
; Seems to not work
; define <2 x ptr> @ptrmask_flat_i64_v2(<2 x ptr> %ptr, <2 x i64> %mask) {
; %masked = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> %mask)
@@ -144,10 +42,4 @@ define ptr addrspace(3) @ptrmask_local_i1(ptr addrspace(3) %ptr, i1 %mask) {
; }
declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
-declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
-declare ptr @llvm.ptrmask.p0.i16(ptr, i16)
-declare ptr @llvm.ptrmask.p0.i1(ptr, i1)
-declare ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3), i64)
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32)
-declare ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3), i16)
-declare ptr addrspace(3) @llvm.ptrmask.p3.i1(ptr addrspace(3), i1)
diff --git a/llvm/test/CodeGen/AMDGPU/ptrmask.ll b/llvm/test/CodeGen/AMDGPU/ptrmask.ll
index c69c55338f84d08..70622706789331f 100644
--- a/llvm/test/CodeGen/AMDGPU/ptrmask.ll
+++ b/llvm/test/CodeGen/AMDGPU/ptrmask.ll
@@ -21,71 +21,6 @@ define ptr addrspace(1) @v_ptrmask_global_variable_i64(ptr addrspace(1) %ptr, i6
ret ptr addrspace(1) %masked
}
-define ptr addrspace(1) @v_ptrmask_global_variable_i32(ptr addrspace(1) %ptr, i32 %mask) {
-; GCN-LABEL: v_ptrmask_global_variable_i32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_and_b32_e32 v0, v0, v2
-; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_ptrmask_global_variable_i32:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_and_b32_e32 v0, v0, v2
-; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_ptrmask_global_variable_i32:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v2
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %masked = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %ptr, i32 %mask)
- ret ptr addrspace(1) %masked
-}
-
-define ptr addrspace(1) @v_ptrmask_global_variable_i16(ptr addrspace(1) %ptr, i16 %mask) {
-; GCN-LABEL: v_ptrmask_global_variable_i16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GCN-NEXT: v_mov_b32_e32 v1, 0
-; GCN-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_ptrmask_global_variable_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: v_mov_b32_e32 v1, 0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_ptrmask_global_variable_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v2
-; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %masked = call ptr addrspace(1) @llvm.ptrmask.p1.i16(ptr addrspace(1) %ptr, i16 %mask)
- ret ptr addrspace(1) %masked
-}
-
-define ptr addrspace(3) @v_ptrmask_local_variable_i64(ptr addrspace(3) %ptr, i64 %mask) {
-; GCN-LABEL: v_ptrmask_local_variable_i64:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_and_b32_e32 v0, v0, v1
-; GCN-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10PLUS-LABEL: v_ptrmask_local_variable_i64:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10PLUS-NEXT: v_and_b32_e32 v0, v0, v1
-; GFX10PLUS-NEXT: s_setpc_b64 s[30:31]
- %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask)
- ret ptr addrspace(3) %masked
-}
-
define ptr addrspace(3) @v_ptrmask_local_variable_i32(ptr addrspace(3) %ptr, i32 %mask) {
; GCN-LABEL: v_ptrmask_local_variable_i32:
; GCN: ; %bb.0:
@@ -102,29 +37,6 @@ define ptr addrspace(3) @v_ptrmask_local_variable_i32(ptr addrspace(3) %ptr, i32
ret ptr addrspace(3) %masked
}
-define ptr addrspace(3) @v_ptrmask_local_variable_i16(ptr addrspace(3) %ptr, i16 %mask) {
-; GCN-LABEL: v_ptrmask_local_variable_i16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GCN-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX10-LABEL: v_ptrmask_local_variable_i16:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-NEXT: v_and_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
-; GFX10-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-LABEL: v_ptrmask_local_variable_i16:
-; GFX11: ; %bb.0:
-; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX11-NEXT: v_and_b32_e32 v0, v0, v1
-; GFX11-NEXT: s_setpc_b64 s[30:31]
- %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3) %ptr, i16 %mask)
- ret ptr addrspace(3) %masked
-}
-
define amdgpu_ps ptr addrspace(1) @s_ptrmask_global_variable_i64(ptr addrspace(1) inreg %ptr, i64 inreg %mask) {
; GCN-LABEL: s_ptrmask_global_variable_i64:
; GCN: ; %bb.0:
@@ -139,58 +51,6 @@ define amdgpu_ps ptr addrspace(1) @s_ptrmask_global_variable_i64(ptr addrspace(1
ret ptr addrspace(1) %masked
}
-define amdgpu_ps ptr addrspace(1) @s_ptrmask_global_variable_i32(ptr addrspace(1) inreg %ptr, i32 inreg %mask) {
-; GCN-LABEL: s_ptrmask_global_variable_i32:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_mov_b32 s5, 0
-; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
-; GCN-NEXT: s_mov_b32 s1, 0
-; GCN-NEXT: ; return to shader part epilog
-;
-; GFX10PLUS-LABEL: s_ptrmask_global_variable_i32:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_mov_b32 s5, 0
-; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[4:5]
-; GFX10PLUS-NEXT: s_mov_b32 s1, 0
-; GFX10PLUS-NEXT: ; return to shader part epilog
- %masked = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %ptr, i32 %mask)
- ret ptr addrspace(1) %masked
-}
-
-define amdgpu_ps ptr addrspace(1) @s_ptrmask_global_variable_i16(ptr addrspace(1) inreg %ptr, i16 inreg %mask) {
-; GCN-LABEL: s_ptrmask_global_variable_i16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_and_b32 s0, s4, 0xffff
-; GCN-NEXT: s_mov_b32 s1, 0
-; GCN-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT: s_mov_b32 s1, 0
-; GCN-NEXT: ; return to shader part epilog
-;
-; GFX10PLUS-LABEL: s_ptrmask_global_variable_i16:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_mov_b32 s1, 0
-; GFX10PLUS-NEXT: s_and_b32 s0, s4, 0xffff
-; GFX10PLUS-NEXT: s_and_b64 s[0:1], s[2:3], s[0:1]
-; GFX10PLUS-NEXT: s_mov_b32 s1, 0
-; GFX10PLUS-NEXT: ; return to shader part epilog
- %masked = call ptr addrspace(1) @llvm.ptrmask.p1.i16(ptr addrspace(1) %ptr, i16 %mask)
- ret ptr addrspace(1) %masked
-}
-
-define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i64(ptr addrspace(3) inreg %ptr, i64 inreg %mask) {
-; GCN-LABEL: s_ptrmask_local_variable_i64:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_and_b32 s0, s2, s3
-; GCN-NEXT: ; return to shader part epilog
-;
-; GFX10PLUS-LABEL: s_ptrmask_local_variable_i64:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_and_b32 s0, s2, s3
-; GFX10PLUS-NEXT: ; return to shader part epilog
- %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3) %ptr, i64 %mask)
- ret ptr addrspace(3) %masked
-}
-
define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3) inreg %ptr, i32 inreg %mask) {
; GCN-LABEL: s_ptrmask_local_variable_i32:
; GCN: ; %bb.0:
@@ -205,27 +65,10 @@ define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i32(ptr addrspace(3)
ret ptr addrspace(3) %masked
}
-define amdgpu_ps ptr addrspace(3) @s_ptrmask_local_variable_i16(ptr addrspace(3) inreg %ptr, i16 inreg %mask) {
-; GCN-LABEL: s_ptrmask_local_variable_i16:
-; GCN: ; %bb.0:
-; GCN-NEXT: s_and_b32 s0, 0xffff, s3
-; GCN-NEXT: s_and_b32 s0, s2, s0
-; GCN-NEXT: ; return to shader part epilog
-;
-; GFX10PLUS-LABEL: s_ptrmask_local_variable_i16:
-; GFX10PLUS: ; %bb.0:
-; GFX10PLUS-NEXT: s_and_b32 s0, 0xffff, s3
-; GFX10PLUS-NEXT: s_and_b32 s0, s2, s0
-; GFX10PLUS-NEXT: ; return to shader part epilog
- %masked = call ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3) %ptr, i16 %mask)
- ret ptr addrspace(3) %masked
-}
-
-declare ptr addrspace(3) @llvm.ptrmask.p3.i64(ptr addrspace(3), i64) #0
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
-declare ptr addrspace(3) @llvm.ptrmask.p3.i16(ptr addrspace(3), i16) #0
declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
-declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32) #0
-declare ptr addrspace(1) @llvm.ptrmask.p1.i16(ptr addrspace(1), i16) #0
attributes #0 = { nounwind readnone speculatable willreturn }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX10: {{.*}}
+; GFX11: {{.*}}
diff --git a/llvm/test/CodeGen/X86/lower-ptrmask.ll b/llvm/test/CodeGen/X86/lower-ptrmask.ll
index 185564e5a07ae5c..406241ecfff0271 100644
--- a/llvm/test/CodeGen/X86/lower-ptrmask.ll
+++ b/llvm/test/CodeGen/X86/lower-ptrmask.ll
@@ -14,22 +14,6 @@ define ptr @test1(ptr %src) {
ret ptr %ptr
}
-declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
-
-; CHECK-LABEL: name: test2
-; CHECK: %0:gr64 = COPY $rdi
-; CHECK-NEXT: %1:gr32 = COPY %0.sub_32bit
-; CHECK-NEXT: %2:gr32 = AND32ri %1, 10000, implicit-def dead $eflags
-; CHECK-NEXT: %3:gr64 = SUBREG_TO_REG 0, killed %2, %subreg.sub_32bit
-; CHECK-NEXT: $rax = COPY %3
-; CHECK-NEXT: RET 0, $rax
-
-
-define ptr @test2(ptr %src) {
- %ptr = call ptr @llvm.ptrmask.p0.i32(ptr %src, i32 10000)
- ret ptr %ptr
-}
-
declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>)
; CHECK-LABEL: name: test3
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index c9db0656c6b7dd4..280630468c2c09a 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -346,23 +346,7 @@ define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr,
ret i8 %load
}
-; This should not be folded, as the mask is implicitly zero extended,
-; so it would clear the high bits.
-define i8 @ptrmask_cast_local_to_flat_const_mask_32bit_neg4(ptr addrspace(3) %src.ptr) {
-; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_32bit_neg4(
-; CHECK-NEXT: [[CAST:%.*]] = addrspacecast ptr addrspace(3) [[SRC_PTR:%.*]] to ptr
-; CHECK-NEXT: [[MASKED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[CAST]], i32 -4)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[MASKED]], align 1
-; CHECK-NEXT: ret i8 [[LOAD]]
-;
- %cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
- %masked = call ptr @llvm.ptrmask.p0.i32(ptr %cast, i32 -4)
- %load = load i8, ptr %masked
- ret i8 %load
-}
-
declare ptr @llvm.ptrmask.p0.i64(ptr, i64) #0
-declare ptr @llvm.ptrmask.p0.i32(ptr, i32) #0
declare ptr addrspace(5) @llvm.ptrmask.p5.i32(ptr addrspace(5), i32) #0
declare ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3), i32) #0
declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64) #0
diff --git a/llvm/test/Transforms/InferAlignment/ptrmask.ll b/llvm/test/Transforms/InferAlignment/ptrmask.ll
index 52a8bcecba13dee..afab872d16d5eac 100644
--- a/llvm/test/Transforms/InferAlignment/ptrmask.ll
+++ b/llvm/test/Transforms/InferAlignment/ptrmask.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=infer-alignment -S | FileCheck %s
+target datalayout = "p1:64:64:64:32-p2:64:64:64:128"
+
; ------------------------------------------------------------------------------
; load instructions
; ------------------------------------------------------------------------------
@@ -74,4 +76,30 @@ define void @ptrmask_overaligned(ptr align 16 %ptr) {
ret void
}
+define i8 @smaller_index_type(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define i8 @smaller_index_type
+; CHECK-SAME: (ptr addrspace(1) [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR2:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[PTR]], i32 -4)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(1) [[PTR2]], align 4
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %ptr2 = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %ptr, i32 -4)
+ %load = load i8, ptr addrspace(1) %ptr2, align 1
+ ret i8 %load
+}
+
+define i8 @larger_index_type(ptr addrspace(2) %ptr) {
+; CHECK-LABEL: define i8 @larger_index_type
+; CHECK-SAME: (ptr addrspace(2) [[PTR:%.*]]) {
+; CHECK-NEXT: [[PTR2:%.*]] = call ptr addrspace(2) @llvm.ptrmask.p2.i128(ptr addrspace(2) [[PTR]], i128 -4)
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(2) [[PTR2]], align 4
+; CHECK-NEXT: ret i8 [[LOAD]]
+;
+ %ptr2 = call ptr addrspace(2) @llvm.ptrmask.p2.i128(ptr addrspace(2) %ptr, i128 -4)
+ %load = load i8, ptr addrspace(2) %ptr2, align 1
+ ret i8 %load
+}
+
declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
+declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32)
+declare ptr addrspace(2) @llvm.ptrmask.p2.i128(ptr addrspace(2), i128)
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index b944dfef6ec71cb..ec8e7c9348f1749 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -121,8 +121,6 @@ define void @test3(ptr sret(%struct.s) %a4) {
}
declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
-declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
-declare ptr @llvm.ptrmask.p0.i128(ptr, i128)
define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_align_unknown_ptr_align1(
@@ -207,29 +205,3 @@ define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) {
%load = load <16 x i8>, ptr %aligned, align 1
ret <16 x i8> %load
}
-
-; Increase load align from 1 to 8, and the mask type is smaller
-; than the pointer size.
-define <16 x i8> @ptrmask_align8_ptr_align1_smallmask(ptr align 1 %ptr) {
-; CHECK-LABEL: @ptrmask_align8_ptr_align1_smallmask(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -8)
-; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
-; CHECK-NEXT: ret <16 x i8> [[LOAD]]
-;
- %aligned = call ptr @llvm.ptrmask.p0.i32(ptr %ptr, i32 -8)
- %load = load <16 x i8>, ptr %aligned, align 1
- ret <16 x i8> %load
-}
-
-; Increase load align from 1 to 8, and the mask type is larger
-; than the pointer size.
-define <16 x i8> @ptrmask_align8_ptr_align1_bigmask(ptr align 1 %ptr) {
-; CHECK-LABEL: @ptrmask_align8_ptr_align1_bigmask(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i128(ptr [[PTR:%.*]], i128 -8)
-; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
-; CHECK-NEXT: ret <16 x i8> [[LOAD]]
-;
- %aligned = call ptr @llvm.ptrmask.p0.i128(ptr %ptr, i128 -8)
- %load = load <16 x i8>, ptr %aligned, align 1
- ret <16 x i8> %load
-}
diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
index d2da3be3201cfc0..bfe9efb1e88dd27 100644
--- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
@@ -1,8 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+target datalayout = "p1:64:64:64:32"
+
declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
-declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
+declare ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1), i32)
declare <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr>, <2 x i64>)
declare void @use.ptr(ptr)
@@ -18,18 +20,6 @@ define ptr @fold_2x(ptr %p, i64 %m0, i64 %m1) {
ret ptr %p1
}
-define ptr @fold_2x_i32(ptr %p, i32 %m0, i32 %m1) {
-; CHECK-LABEL: define ptr @fold_2x_i32
-; CHECK-SAME: (ptr [[P:%.*]], i32 [[M0:%.*]], i32 [[M1:%.*]]) {
-; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[M1]], [[M0]]
-; CHECK-NEXT: [[P1:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[P]], i32 [[TMP1]])
-; CHECK-NEXT: ret ptr [[P1]]
-;
- %p0 = call ptr @llvm.ptrmask.p0.i32(ptr %p, i32 %m0)
- %p1 = call ptr @llvm.ptrmask.p0.i32(ptr %p0, i32 %m1)
- ret ptr %p1
-}
-
define ptr @fold_2x_fail_multiuse(ptr %p, i64 %m0, i64 %m1) {
; CHECK-LABEL: define ptr @fold_2x_fail_multiuse
; CHECK-SAME: (ptr [[P:%.*]], i64 [[M0:%.*]], i64 [[M1:%.*]]) {
@@ -44,30 +34,6 @@ define ptr @fold_2x_fail_multiuse(ptr %p, i64 %m0, i64 %m1) {
ret ptr %p1
}
-define ptr @fold_2x_fail_type_mismatch(ptr %p, i32 %m0, i64 %m1) {
-; CHECK-LABEL: define ptr @fold_2x_fail_type_mismatch
-; CHECK-SAME: (ptr [[P:%.*]], i32 [[M0:%.*]], i64 [[M1:%.*]]) {
-; CHECK-NEXT: [[P0:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[P]], i32 [[M0]])
-; CHECK-NEXT: [[P1:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P0]], i64 [[M1]])
-; CHECK-NEXT: ret ptr [[P1]]
-;
- %p0 = call ptr @llvm.ptrmask.p0.i32(ptr %p, i32 %m0)
- %p1 = call ptr @llvm.ptrmask.p0.i64(ptr %p0, i64 %m1)
- ret ptr %p1
-}
-
-define ptr @fold_2x_fail_type_mismatch2(ptr %p, i64 %m0, i32 %m1) {
-; CHECK-LABEL: define ptr @fold_2x_fail_type_mismatch2
-; CHECK-SAME: (ptr [[P:%.*]], i64 [[M0:%.*]], i32 [[M1:%.*]]) {
-; CHECK-NEXT: [[P0:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[P]], i64 [[M0]])
-; CHECK-NEXT: [[P1:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[P0]], i32 [[M1]])
-; CHECK-NEXT: ret ptr [[P1]]
-;
- %p0 = call ptr @llvm.ptrmask.p0.i64(ptr %p, i64 %m0)
- %p1 = call ptr @llvm.ptrmask.p0.i32(ptr %p0, i32 %m1)
- ret ptr %p1
-}
-
define <2 x ptr> @fold_2x_vec(<2 x ptr> %p, <2 x i64> %m0, <2 x i64> %m1) {
; CHECK-LABEL: define <2 x ptr> @fold_2x_vec
; CHECK-SAME: (<2 x ptr> [[P:%.*]], <2 x i64> [[M0:%.*]], <2 x i64> [[M1:%.*]]) {
@@ -79,3 +45,15 @@ define <2 x ptr> @fold_2x_vec(<2 x ptr> %p, <2 x i64> %m0, <2 x i64> %m1) {
%p1 = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %p0, <2 x i64> %m1)
ret <2 x ptr> %p1
}
+
+define ptr addrspace(1) @fold_2x_smaller_index_type(ptr addrspace(1) %p, i32 %m0, i32 %m1) {
+; CHECK-LABEL: define ptr addrspace(1) @fold_2x_smaller_index_type
+; CHECK-SAME: (ptr addrspace(1) [[P:%.*]], i32 [[M0:%.*]], i32 [[M1:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[M1]], [[M0]]
+; CHECK-NEXT: [[P1:%.*]] = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) [[P]], i32 [[TMP1]])
+; CHECK-NEXT: ret ptr addrspace(1) [[P1]]
+;
+ %p0 = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p, i32 %m0)
+ %p1 = call ptr addrspace(1) @llvm.ptrmask.p1.i32(ptr addrspace(1) %p0, i32 %m1)
+ ret ptr addrspace(1) %p1
+}
diff --git a/llvm/test/Verifier/ptrmask.ll b/llvm/test/Verifier/ptrmask.ll
index c93d60b30a84804..a2e08c06e08c143 100644
--- a/llvm/test/Verifier/ptrmask.ll
+++ b/llvm/test/Verifier/ptrmask.ll
@@ -1,9 +1,13 @@
; RUN: not llvm-as < %s 2>&1 | FileCheck %s
+target datalayout = "p1:64:64:64:32"
+
declare float @llvm.ptrmask.f32.i64(float, i64)
declare ptr @llvm.ptrmask.p0.v4i64(ptr, <4 x i64>)
declare <2 x ptr> @llvm.ptrmask.v2p0.i64(<2 x ptr>, i64)
declare <2 x ptr> @llvm.ptrmask.v2p0.v4i64(<2 x ptr>, <4 x i64>)
+declare ptr @llvm.ptrmask.p0.i32(ptr, i32)
+declare ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1), i64)
; CHECK: llvm.ptrmask intrinsic first argument must be pointer or vector of pointers
; CHECK-NEXT: %1 = call float @llvm.ptrmask.f32.i64(float 0.000000e+00, i64 0)
@@ -32,3 +36,17 @@ define void @vector_size_mismatch() {
call <2 x ptr> @llvm.ptrmask.v2p0.v4i64(<2 x ptr> zeroinitializer, <4 x i64> zeroinitializer)
ret void
}
+
+; CHECK: llvm.ptrmask intrinsic second argument bitwidth must match pointer index type size of first argument
+; CHECK: %1 = call ptr @llvm.ptrmask.p0.i32(ptr null, i32 0)
+define void @wrong_size_1() {
+ call ptr @llvm.ptrmask.p0.i32(ptr null, i32 0)
+ ret void
+}
+
+; CHECK: llvm.ptrmask intrinsic second argument bitwidth must match pointer index type size of first argument
+; CHECK: %1 = call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) null, i64 0)
+define void @wrong_size_2() {
+ call ptr addrspace(1) @llvm.ptrmask.p1.i64(ptr addrspace(1) null, i64 0)
+ ret void
+}
>From 446be93ed7384375075e93a5cf4eff2e55a2883d Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov at redhat.com>
Date: Thu, 19 Oct 2023 09:43:15 +0200
Subject: [PATCH 2/2] Drop leftover sentence, clarify no capturing
---
llvm/docs/LangRef.rst | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 53dc00a41e01e96..bdb4f267bdedb68 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -26980,9 +26980,9 @@ implies that the mask is extended with 1 bits to the pointer type size.
Both the returned pointer(s) and the first argument are based on the same
underlying object (for more information on the *based on* terminology see
-:ref:`the pointer aliasing rules <pointeraliasing>`). If the bitwidth of the
-mask argument does not match the pointer size of the target, the mask is
-zero-extended or truncated accordingly.
+:ref:`the pointer aliasing rules <pointeraliasing>`).
+
+The intrinsic only captures the pointer argument through the return value.
.. _int_threadlocal_address:
More information about the llvm-commits
mailing list