[clang] Add combines/simplifications for `llvm.ptrmask` (PR #67166)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 22 10:07:32 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
<details>
<summary>Changes</summary>
- [InstSimplify] Add tests for simplify `llvm.ptrmask`; NFC
- [InstSimplify] Add some basic simplifications for `llvm.ptrmask`
- [InstCombine] Add tests for combining `llvm.ptrmask`; NFC
- [InstCombine] Make combines on `llvm.ptrmask` fail loudly if we have vec types; NFC
- [InstCombine] Deduce `align` and `nonnull` return attributes for `llvm.ptrmask`
- [InstCombine] Implement `SimplifyDemandedBits` for `llvm.ptrmask`
- [InstCombine] Preserve return attributes when merging `llvm.ptrmask`
- [InstCombine] Merge consecutive `llvm.ptrmask` with different mask types if a mask is constant.
- [InstCombine] Fold `(ptrtoint (ptrmask p0, m0))` -> `(and (ptrtoint p0), m0)`
---
Patch is 31.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/67166.diff
11 Files Affected:
- (modified) clang/test/CodeGen/arm64_32-vaarg.c (+1-1)
- (modified) llvm/lib/Analysis/InstructionSimplify.cpp (+42)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+69-9)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp (+9)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineInternal.h (+1)
- (modified) llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp (+63-5)
- (modified) llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll (+2-3)
- (modified) llvm/test/Transforms/InstCombine/align-addr.ll (+8-10)
- (modified) llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll (+37)
- (added) llvm/test/Transforms/InstCombine/ptrmask.ll (+154)
- (added) llvm/test/Transforms/InstSimplify/ptrmask.ll (+135)
``````````diff
diff --git a/clang/test/CodeGen/arm64_32-vaarg.c b/clang/test/CodeGen/arm64_32-vaarg.c
index 9fbcf88ecfdcc33..3f1f4443436da15 100644
--- a/clang/test/CodeGen/arm64_32-vaarg.c
+++ b/clang/test/CodeGen/arm64_32-vaarg.c
@@ -29,7 +29,7 @@ long long test_longlong(OneLongLong input, va_list *mylist) {
// CHECK-LABEL: define{{.*}} i64 @test_longlong(i64 %input
// CHECK: [[STARTPTR:%.*]] = load ptr, ptr %mylist
// CHECK: [[ALIGN_TMP:%.+]] = getelementptr inbounds i8, ptr [[STARTPTR]], i32 7
- // CHECK: [[ALIGNED_ADDR:%.+]] = tail call ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
+ // CHECK: [[ALIGNED_ADDR:%.+]] = tail call align 8 ptr @llvm.ptrmask.p0.i32(ptr nonnull [[ALIGN_TMP]], i32 -8)
// CHECK: [[NEXT:%.*]] = getelementptr inbounds i8, ptr [[ALIGNED_ADDR]], i32 8
// CHECK: store ptr [[NEXT]], ptr %mylist
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index d8aa614cae53b10..1049845d97844a7 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -6397,6 +6397,48 @@ static Value *simplifyBinaryIntrinsic(Function *F, Value *Op0, Value *Op1,
return Constant::getNullValue(ReturnType);
break;
}
+ case Intrinsic::ptrmask: {
+ // Fail loudly in case this is ever changed.
+ // TODO: If vector types are supported the logic that checks if the mask is
+ // useless should be updated to use generic constants.
+ assert(!Op0->getType()->isVectorTy() && !Op1->getType()->isVectorTy() &&
+ "These simplifications where written at a time when ptrmask did not "
+ "support vector types and may not work for vectors");
+
+ // NOTE: We can't apply these simplifications based on the value of Op1
+ // because we need to preserve provenance.
+ if (isa<PoisonValue>(Op0))
+ return Op0;
+
+ if (Q.isUndefValue(Op0))
+ return Constant::getNullValue(Op0->getType());
+
+ if (match(Op0, m_Zero()))
+ return Constant::getNullValue(Op0->getType());
+
+ if (Op1->getType()->getScalarSizeInBits() ==
+ Q.DL.getPointerTypeSizeInBits(Op0->getType())) {
+ if (match(Op1, m_PtrToInt(m_Specific(Op0))))
+ return Op0;
+
+ // TODO: We may have attributes assosiated with the return value of the
+ // llvm.ptrmask intrinsic that will be lost when we just return the
+ // operand. We should try to preserve them.
+ if (match(Op1, m_AllOnes()))
+ return Op0;
+
+ const APInt *C;
+ if (match(Op1, m_APInt(C))) {
+ KnownBits PtrKnown =
+ computeKnownBits(Op0, Q.DL, /*Depth*/ 0, Q.AC, Q.CxtI, Q.DT);
+ // See if we only masking off bits we know are already zero due to
+ // alignment.
+ if ((*C | PtrKnown.Zero).isAllOnes())
+ return Op0;
+ }
+ }
+ break;
+ }
case Intrinsic::smax:
case Intrinsic::smin:
case Intrinsic::umax:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c6100f24b0507de..e0a8b30b34a8ff0 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1964,18 +1964,78 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
break;
}
case Intrinsic::ptrmask: {
+ KnownBits Known(DL.getPointerTypeSizeInBits(II->getType()));
+ if (SimplifyDemandedInstructionBits(*II, Known))
+ return II;
+
+ Value *Op0 = II->getArgOperand(0);
+ Value *Op1 = II->getArgOperand(1);
+ // Fail loudly in case this is ever changed.
+ // TODO: If vector types are supported the merging of (ptrmask (ptrmask))
+ // need to ensure we don't merge a vectype with non-vec type.
+ assert(!Op0->getType()->isVectorTy() && !Op1->getType()->isVectorTy() &&
+ "These combines where written at a time when ptrmask did not "
+ "support vector types and may not work for vectors");
+
Value *InnerPtr, *InnerMask;
- if (match(II->getArgOperand(0),
- m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(InnerPtr),
- m_Value(InnerMask))))) {
- if (II->getArgOperand(1)->getType() == InnerMask->getType()) {
- Value *NewMask = Builder.CreateAnd(II->getArgOperand(1), InnerMask);
- return replaceInstUsesWith(
- *II,
- Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
- {InnerPtr, NewMask}));
+ bool Changed = false;
+ // Combine:
+ // (ptrmask (ptrmask p, A), B)
+ // -> (ptrmask p, (and A, B))
+ if (match(Op0, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(
+ m_Value(InnerPtr), m_Value(InnerMask))))) {
+ // See if combining the two masks is free.
+ bool OkayToMerge = InnerMask->getType() == Op1->getType();
+ bool NeedsNew = false;
+ if (!OkayToMerge) {
+ if (match(InnerMask, m_ImmConstant())) {
+ InnerMask = Builder.CreateZExtOrTrunc(InnerMask, Op1->getType());
+ OkayToMerge = true;
+ } else if (match(Op1, m_ImmConstant())) {
+ Op1 = Builder.CreateZExtOrTrunc(Op1, InnerMask->getType());
+ OkayToMerge = true;
+ // Need to create a new one here, as the intrinsic id needs to change.
+ NeedsNew = true;
+ }
+ }
+ if (InnerMask->getType() == Op1->getType()) {
+ // TODO: If InnerMask == Op1, we could copy attributes from inner
+ // callsite -> outer callsite.
+ Value *NewMask = Builder.CreateAnd(Op1, InnerMask);
+ if (NeedsNew)
+ return replaceInstUsesWith(
+ *II,
+ Builder.CreateIntrinsic(InnerPtr->getType(), Intrinsic::ptrmask,
+ {InnerPtr, NewMask}));
+
+ replaceOperand(CI, 0, InnerPtr);
+ replaceOperand(CI, 1, NewMask);
+ Changed = true;
}
}
+
+ // See if we can deduce non-null.
+ if (!CI.hasRetAttr(Attribute::NonNull) &&
+ (Known.isNonZero() ||
+ isKnownNonZero(II, DL, /*Depth*/ 0, &AC, II, &DT))) {
+ CI.addRetAttr(Attribute::NonNull);
+ Changed = true;
+ }
+
+ // Known bits will capture if we had alignment information assosiated with
+ // the pointer argument.
+ if (Known.countMinTrailingZeros() > Log2(CI.getRetAlign().valueOrOne())) {
+ if (CI.hasRetAttr(Attribute::Alignment))
+ CI.removeRetAttr(Attribute::Alignment);
+ CI.addRetAttr(Attribute::get(
+ CI.getContext(), Attribute::Alignment,
+ uint64_t(1) << (Known.isZero() ? (Known.getBitWidth() - 1)
+ : Known.countMinTrailingZeros())));
+
+ Changed = true;
+ }
+ if (Changed)
+ return &CI;
break;
}
case Intrinsic::uadd_with_overflow:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 8ff61ab36da307b..508609670b9ac88 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -1955,6 +1955,15 @@ Instruction *InstCombinerImpl::visitPtrToInt(PtrToIntInst &CI) {
return CastInst::CreateIntegerCast(P, Ty, /*isSigned=*/false);
}
+ // (ptrtoint (ptrmask P, M))
+ // -> (and (ptrtoint P), M)
+ // This is generally beneficial as `and` is better supported than `ptrmask`.
+ Value *Ptr, *Mask;
+ if (match(SrcOp, m_OneUse(m_Intrinsic<Intrinsic::ptrmask>(m_Value(Ptr),
+ m_Value(Mask)))) &&
+ Mask->getType() == Ty)
+ return BinaryOperator::CreateAnd(Builder.CreatePtrToInt(Ptr, Ty), Mask);
+
if (auto *GEP = dyn_cast<GetElementPtrInst>(SrcOp)) {
// Fold ptrtoint(gep null, x) to multiply + constant if the GEP has one use.
// While this can increase the number of instructions it doesn't actually
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 40c24d87bfec508..0b03aa48fa0c0cc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -526,6 +526,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
/// Tries to simplify operands to an integer instruction based on its
/// demanded bits.
bool SimplifyDemandedInstructionBits(Instruction &Inst);
+ bool SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known);
Value *SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
APInt &UndefElts, unsigned Depth = 0,
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index be005e61a8d2d89..5a5334b1347a95d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -48,15 +48,19 @@ static bool ShrinkDemandedConstant(Instruction *I, unsigned OpNo,
return true;
}
+/// Returns the bitwidth of the given scalar or pointer type. For vector types,
+/// returns the element type's bitwidth.
+static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
+ if (unsigned BitWidth = Ty->getScalarSizeInBits())
+ return BitWidth;
+ return DL.getPointerTypeSizeInBits(Ty);
+}
/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
/// the instruction has any properties that allow us to simplify its operands.
-bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
- unsigned BitWidth = Inst.getType()->getScalarSizeInBits();
- KnownBits Known(BitWidth);
- APInt DemandedMask(APInt::getAllOnes(BitWidth));
-
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst, KnownBits &Known) {
+ APInt DemandedMask(APInt::getAllOnes(Known.getBitWidth()));
Value *V = SimplifyDemandedUseBits(&Inst, DemandedMask, Known,
0, &Inst);
if (!V) return false;
@@ -65,6 +69,13 @@ bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
return true;
}
+/// Inst is an integer instruction that SimplifyDemandedBits knows about. See if
+/// the instruction has any properties that allow us to simplify its operands.
+bool InstCombinerImpl::SimplifyDemandedInstructionBits(Instruction &Inst) {
+ KnownBits Known(getBitWidth(Inst.getType(), DL));
+ return SimplifyDemandedInstructionBits(Inst, Known);
+}
+
/// This form of SimplifyDemandedBits simplifies the specified instruction
/// operand if possible, updating it in place. It returns true if it made any
/// change and false otherwise.
@@ -898,6 +909,53 @@ Value *InstCombinerImpl::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
}
break;
}
+ case Intrinsic::ptrmask: {
+ // Fail loudly in case this is ever changed.
+ // Likely not much needs to be changed here to support vector types.
+ assert(!I->getOperand(0)->getType()->isVectorTy() &&
+ !I->getOperand(1)->getType()->isVectorTy() &&
+ "These simplifications where written at a time when ptrmask did "
+ "not support vector types and may not work for vectors");
+
+ unsigned MaskWidth = I->getOperand(1)->getType()->getScalarSizeInBits();
+ RHSKnown = KnownBits(MaskWidth);
+ // If either the LHS or the RHS are Zero, the result is zero.
+ if (SimplifyDemandedBits(I, 0, DemandedMask, LHSKnown, Depth + 1) ||
+ SimplifyDemandedBits(
+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth),
+ RHSKnown, Depth + 1))
+ return I;
+
+ RHSKnown = RHSKnown.zextOrTrunc(BitWidth);
+ assert(!RHSKnown.hasConflict() && "Bits known to be one AND zero?");
+ assert(!LHSKnown.hasConflict() && "Bits known to be one AND zero?");
+
+ Known = LHSKnown & RHSKnown;
+ KnownBitsComputed = DemandedMask.isAllOnes();
+
+ // If the client is only demanding bits we know to be zero, return
+ // `llvm.ptrmask(p, 0)`. We can't return `null` here due to pointer
+ // provenance, but making the mask zero will be easily optimizable in
+ // the backend.
+ if (DemandedMask.isSubsetOf(Known.Zero))
+ return replaceOperand(
+ *I, 1, Constant::getNullValue(I->getOperand(1)->getType()));
+
+ // Mask in demanded space does nothing.
+ // TODO: We may have attributes assosiated with the return value of the
+ // llvm.ptrmask intrinsic that will be lost when we just return the
+ // operand. We should try to preserve them.
+ if (DemandedMask.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return I->getOperand(0);
+
+ // If the RHS is a constant, see if we can simplify it.
+ if (ShrinkDemandedConstant(
+ I, 1, (DemandedMask & ~LHSKnown.Zero).zextOrTrunc(MaskWidth)))
+ return I;
+
+ break;
+ }
+
case Intrinsic::fshr:
case Intrinsic::fshl: {
const APInt *SA;
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
index c9db0656c6b7dd4..561a5ff35ba1082 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/ptrmask.ll
@@ -320,8 +320,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_fffffffffffffffe(ptr addrspace(
define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(3) %src.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(
-; CHECK-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 -1)
-; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP1]], align 1
+; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[SRC_PTR:%.*]], align 1
; CHECK-NEXT: ret i8 [[LOAD]]
;
%cast = addrspacecast ptr addrspace(3) %src.ptr to ptr
@@ -333,7 +332,7 @@ define i8 @ptrmask_cast_local_to_flat_const_mask_ffffffffffffffff(ptr addrspace(
; Make sure non-constant masks can also be handled.
define i8 @ptrmask_cast_local_to_flat_load_range_mask(ptr addrspace(3) %src.ptr, ptr addrspace(1) %mask.ptr) {
; CHECK-LABEL: @ptrmask_cast_local_to_flat_load_range_mask(
-; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range !0
+; CHECK-NEXT: [[LOAD_MASK:%.*]] = load i64, ptr addrspace(1) [[MASK_PTR:%.*]], align 8, !range [[RNG0:![0-9]+]]
; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 [[LOAD_MASK]] to i32
; CHECK-NEXT: [[TMP2:%.*]] = call ptr addrspace(3) @llvm.ptrmask.p3.i32(ptr addrspace(3) [[SRC_PTR:%.*]], i32 [[TMP1]])
; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr addrspace(3) [[TMP2]], align 1
diff --git a/llvm/test/Transforms/InstCombine/align-addr.ll b/llvm/test/Transforms/InstCombine/align-addr.ll
index 23f620310d7c26c..f3b33f013a9b5d4 100644
--- a/llvm/test/Transforms/InstCombine/align-addr.ll
+++ b/llvm/test/Transforms/InstCombine/align-addr.ll
@@ -134,7 +134,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask)
define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) {
; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -146,7 +146,7 @@ define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask)
; Increase load align from 1 to 2
define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align2_ptr_align1(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -158,7 +158,7 @@ define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
; Increase load align from 1 to 4
define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align4_ptr_align1(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -170,7 +170,7 @@ define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
; Increase load align from 1 to 8
define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align1(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -183,8 +183,7 @@ define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) {
; TODO: Should be able to drop the ptrmask
define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align8(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
-; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
+; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
%aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
@@ -196,8 +195,7 @@ define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) {
; TODO: Should be able to drop the ptrmask
define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align16(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
-; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
+; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
%aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
@@ -209,7 +207,7 @@ define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) {
; than the pointer size.
define <16 x i8> @ptrmask_align8_ptr_align1_smallmask(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align1_smallmask(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -8)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i32(ptr [[PTR:%.*]], i32 -8)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
@@ -222,7 +220,7 @@ define <16 x i8> @ptrmask_align8_ptr_align1_smallmask(ptr align 1 %ptr) {
; than the pointer size.
define <16 x i8> @ptrmask_align8_ptr_align1_bigmask(ptr align 1 %ptr) {
; CHECK-LABEL: @ptrmask_align8_ptr_align1_bigmask(
-; CHECK-NEXT: [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i128(ptr [[PTR:%.*]], i128 -8)
+; CHECK-NEXT: [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i128(ptr [[PTR:%.*]], i128 18446744073709551608)
; CHECK-NEXT: [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
; CHECK-NEXT: ret <16 x i8> [[LOAD]]
;
diff --git a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
index 904c758b99306f4..0723fe2fd4239c4 100644
--- a/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
+++ b/llvm/test/Transforms/InstCombine/consecutive-ptrmask.ll
@@ -65,3 +65,40 @@ define ptr @fold_2x_fail_type_mismatch2(ptr %p, i64 %m0, i32 %m1) {
%p1 = call ptr @llvm.ptrmask.p0.i32(ptr %p0, i32 %m1)
ret ptr %p1
}
+
+
+define ptr @fold_2x_type_mismatch_const0(ptr %p, i32 %m1) {
+; CHECK-LABEL: define ptr @fold_2x_type_mismatch_const0
+; CHECK-SAME: (ptr [[P:%.*]], i32 [[M1:%.*]]) {
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[M1]], -128
+; CHECK-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/67166
More information about the cfe-commits
mailing list