[llvm] [Attributor] Propagate alignment through ptrmask (PR #150158)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 5 20:59:34 PDT 2025
https://github.com/Shoreshen updated https://github.com/llvm/llvm-project/pull/150158
>From bd9c95b1e2c35e5655f4c0814c58c7e5ecc21c68 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Wed, 23 Jul 2025 10:57:39 +0800
Subject: [PATCH 01/10] propagate alignment through ptrmask instruction
---
.../Transforms/IPO/AttributorAttributes.cpp | 62 +++++++++++
.../Transforms/Attributor/align-ptrmask.ll | 101 ++++++++++++++++++
2 files changed, 163 insertions(+)
create mode 100644 llvm/test/Transforms/Attributor/align-ptrmask.ll
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3c24d2eca647d..67aed0a327f8e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5203,6 +5203,32 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
TrackUse = true;
return 0;
}
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::ptrmask) {
+ auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
+ QueryingAA, IRPosition::value(*(II->getOperand(1))),
+ DepClassTy::NONE);
+ const AAAlign *AlignAA = A.getAAFor<AAAlign>(
+ QueryingAA, IRPosition::value(*(II)), DepClassTy::NONE);
+ if (ConstVals && ConstVals->isValidState()) {
+ if (ConstVals->isAtFixpoint()) {
+ uint64_t TrailingZeros = 64;
+ for (const auto &It : ConstVals->getAssumedSet())
+ if (It.countTrailingZeros() < TrailingZeros)
+ TrailingZeros = It.countTrailingZeros();
+ if (TrailingZeros < 64) {
+ uint64_t Mask = 1 << TrailingZeros;
+ if (Mask >= AlignAA->getKnownAlign().value()) {
+ return 0;
+ }
+ }
+ return AlignAA->getKnownAlign().value();
+ }
+ } else if (AlignAA) {
+ return AlignAA->getKnownAlign().value();
+ }
+ }
+ }
MaybeAlign MA;
if (const auto *CB = dyn_cast<CallBase>(I)) {
@@ -5502,6 +5528,42 @@ struct AAAlignCallSiteReturned final
AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
: Base(IRP, A) {}
+ ChangeStatus updateImpl(Attributor &A) override {
+ Instruction *I = getIRPosition().getCtxI();
+ if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ if (II->getIntrinsicID() == Intrinsic::ptrmask) {
+ const AAPotentialConstantValues *ConstVals =
+ A.getAAFor<AAPotentialConstantValues>(
+ *this, IRPosition::value(*(II->getOperand(1))),
+ DepClassTy::REQUIRED);
+ const AAAlign *AlignAA =
+ A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
+ DepClassTy::REQUIRED);
+ uint64_t Alignment = 0;
+ if (ConstVals && ConstVals->isValidState()) {
+ unsigned TrailingZeros = 64;
+ for (const auto &It : ConstVals->getAssumedSet())
+ if (It.countTrailingZeros() < TrailingZeros)
+ TrailingZeros = It.countTrailingZeros();
+ if (TrailingZeros < 64)
+ Alignment = 1 << TrailingZeros;
+ }
+ if (AlignAA && AlignAA->isValidState() &&
+ Alignment < AlignAA->getAssumedAlign().value())
+ Alignment = AlignAA->getAssumedAlign().value();
+
+ if (Alignment != 0) {
+ uint64_t OldAssumed = getAssumed();
+ takeAssumedMinimum(Alignment);
+ return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
+ } else {
+ return ChangeStatus::UNCHANGED;
+ }
+ }
+ }
+ return Base::updateImpl(A);
+ };
/// See AbstractAttribute::trackStatistics()
void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); }
};
diff --git a/llvm/test/Transforms/Attributor/align-ptrmask.ll b/llvm/test/Transforms/Attributor/align-ptrmask.ll
new file mode 100644
index 0000000000000..710f1c3983b7b
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/align-ptrmask.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=attributor -S < %s | FileCheck %s
+
+define float @align_ptrmask_back_no_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define float @align_ptrmask_back_no_prop(
+; CHECK-SAME: ptr nofree readonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2:[0-9]+]]
+; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %sel = select i1 %cmp1, i64 -32, i64 -8
+ %sel1 = select i1 %cmp2, i64 %sel, i64 -16
+ %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
+ %res = load float, ptr %p, align 8
+ ret float %res
+}
+
+define float @align_ptrmask_back_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define float @align_ptrmask_back_prop(
+; CHECK-SAME: ptr nofree readonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
+; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2]]
+; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 16, !invariant.load [[META0]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %sel = select i1 %cmp1, i64 -32, i64 -8
+ %sel1 = select i1 %cmp2, i64 %sel, i64 -16
+ %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
+ %res = load float, ptr %p, align 16
+ ret float %res
+}
+
+define float @align_ptrmask_forward_mask(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define float @align_ptrmask_forward_mask(
+; CHECK-SAME: ptr nofree readonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2]]
+; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %sel = select i1 %cmp1, i64 -32, i64 -8
+ %sel1 = select i1 %cmp2, i64 %sel, i64 -16
+ %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
+ %res = load float, ptr %p, align 4
+ ret float %res
+}
+
+define float @align_ptrmask_forward_ptr(ptr align 16 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define float @align_ptrmask_forward_ptr(
+; CHECK-SAME: ptr nofree readonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
+; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2]]
+; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 16, !invariant.load [[META0]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %sel = select i1 %cmp1, i64 -32, i64 -8
+ %sel1 = select i1 %cmp2, i64 %sel, i64 -16
+ %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
+ %res = load float, ptr %p, align 4
+ ret float %res
+}
+
+define float @align_ptrmask_forward_nonconst_mask(ptr align 8 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define float @align_ptrmask_forward_nonconst_mask(
+; CHECK-SAME: ptr nofree readonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]]
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR2]]
+; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %sel = select i1 %cmp1, i64 -32, i64 %y
+ %sel1 = select i1 %cmp2, i64 %sel, i64 -16
+ %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
+ %res = load float, ptr %p, align 4
+ ret float %res
+}
+
+define float @align_ptrmask_back_nonconst_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define float @align_ptrmask_back_nonconst_mask(
+; CHECK-SAME: ptr nofree readonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]]
+; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR2]]
+; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0]]
+; CHECK-NEXT: ret float [[RES]]
+;
+ %sel = select i1 %cmp1, i64 -32, i64 %y
+ %sel1 = select i1 %cmp2, i64 %sel, i64 -16
+ %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
+ %res = load float, ptr %p, align 8
+ ret float %res
+}
+;.
+; CHECK: [[META0]] = !{}
+;.
>From 48cd7d645d59f988d27f1a538aa52c3ecb47f8f1 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 28 Jul 2025 13:35:17 +0800
Subject: [PATCH 02/10] fix matthews comments
---
.../Transforms/IPO/AttributorAttributes.cpp | 140 +++++++++-------
.../Transforms/Attributor/align-ptrmask.ll | 149 +++++++++++-------
llvm/test/lit.cfg.py | 2 +-
3 files changed, 180 insertions(+), 111 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 67aed0a327f8e..e1b634eb328a7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5188,6 +5188,82 @@ struct AADereferenceableCallSiteReturned final
// ------------------------ Align Argument Attribute ------------------------
namespace {
+
+static std::optional<uint64_t>
+getKnownAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
+ const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::ptrmask: {
+ auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
+ QueryingAA, IRPosition::value(*(II->getOperand(1))), DepClassTy::NONE);
+ const AAAlign *AlignAA = A.getAAFor<AAAlign>(
+ QueryingAA, IRPosition::value(*(II)), DepClassTy::NONE);
+ if (ConstVals && ConstVals->isValidState()) {
+ if (ConstVals->isAtFixpoint()) {
+ const DataLayout &DL = A.getDataLayout();
+ unsigned Size =
+ DL.getPointerTypeSizeInBits(II->getOperand(0)->getType());
+ uint64_t TrailingZeros = Size;
+ for (const auto &It : ConstVals->getAssumedSet())
+ if (It.countTrailingZeros() < TrailingZeros)
+ TrailingZeros = It.countTrailingZeros();
+ if (TrailingZeros < Size) {
+ uint64_t Mask = 1 << TrailingZeros;
+ if (Mask >= AlignAA->getKnownAlign().value()) {
+ return 0;
+ }
+ }
+ return AlignAA->getKnownAlign().value();
+ }
+ } else if (AlignAA) {
+ return AlignAA->getKnownAlign().value();
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return std::nullopt;
+}
+
+static std::optional<uint64_t>
+getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
+ const IntrinsicInst *II) {
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::ptrmask: {
+ const AAPotentialConstantValues *ConstVals =
+ A.getAAFor<AAPotentialConstantValues>(
+ QueryingAA, IRPosition::value(*(II->getOperand(1))),
+ DepClassTy::REQUIRED);
+ const AAAlign *AlignAA =
+ A.getAAFor<AAAlign>(QueryingAA, IRPosition::value(*(II->getOperand(0))),
+ DepClassTy::REQUIRED);
+ uint64_t Alignment = 0;
+ if (ConstVals && ConstVals->isValidState()) {
+ const DataLayout &DL = A.getDataLayout();
+ unsigned Size = DL.getPointerTypeSizeInBits(II->getOperand(0)->getType());
+ unsigned TrailingZeros = Size;
+ for (const auto &It : ConstVals->getAssumedSet())
+ if (It.countTrailingZeros() < TrailingZeros)
+ TrailingZeros = It.countTrailingZeros();
+ if (TrailingZeros < Size)
+ Alignment = 1 << TrailingZeros;
+ }
+ if (AlignAA && AlignAA->isValidState() &&
+ Alignment < AlignAA->getAssumedAlign().value())
+ Alignment = AlignAA->getAssumedAlign().value();
+
+ if (Alignment != 0) {
+ return Alignment;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+ return std::nullopt;
+}
+
static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
Value &AssociatedValue, const Use *U,
const Instruction *I, bool &TrackUse) {
@@ -5204,30 +5280,10 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
return 0;
}
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::ptrmask) {
- auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
- QueryingAA, IRPosition::value(*(II->getOperand(1))),
- DepClassTy::NONE);
- const AAAlign *AlignAA = A.getAAFor<AAAlign>(
- QueryingAA, IRPosition::value(*(II)), DepClassTy::NONE);
- if (ConstVals && ConstVals->isValidState()) {
- if (ConstVals->isAtFixpoint()) {
- uint64_t TrailingZeros = 64;
- for (const auto &It : ConstVals->getAssumedSet())
- if (It.countTrailingZeros() < TrailingZeros)
- TrailingZeros = It.countTrailingZeros();
- if (TrailingZeros < 64) {
- uint64_t Mask = 1 << TrailingZeros;
- if (Mask >= AlignAA->getKnownAlign().value()) {
- return 0;
- }
- }
- return AlignAA->getKnownAlign().value();
- }
- } else if (AlignAA) {
- return AlignAA->getKnownAlign().value();
- }
- }
+ std::optional<uint64_t> Align =
+ getKnownAlignForIntrinsic(A, QueryingAA, II);
+ if (Align.has_value())
+ return Align.value();
}
MaybeAlign MA;
@@ -5531,36 +5587,14 @@ struct AAAlignCallSiteReturned final
ChangeStatus updateImpl(Attributor &A) override {
Instruction *I = getIRPosition().getCtxI();
if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- if (II->getIntrinsicID() == Intrinsic::ptrmask) {
- const AAPotentialConstantValues *ConstVals =
- A.getAAFor<AAPotentialConstantValues>(
- *this, IRPosition::value(*(II->getOperand(1))),
- DepClassTy::REQUIRED);
- const AAAlign *AlignAA =
- A.getAAFor<AAAlign>(*this, IRPosition::value(*(II->getOperand(0))),
- DepClassTy::REQUIRED);
- uint64_t Alignment = 0;
- if (ConstVals && ConstVals->isValidState()) {
- unsigned TrailingZeros = 64;
- for (const auto &It : ConstVals->getAssumedSet())
- if (It.countTrailingZeros() < TrailingZeros)
- TrailingZeros = It.countTrailingZeros();
- if (TrailingZeros < 64)
- Alignment = 1 << TrailingZeros;
- }
- if (AlignAA && AlignAA->isValidState() &&
- Alignment < AlignAA->getAssumedAlign().value())
- Alignment = AlignAA->getAssumedAlign().value();
-
- if (Alignment != 0) {
- uint64_t OldAssumed = getAssumed();
- takeAssumedMinimum(Alignment);
- return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- } else {
- return ChangeStatus::UNCHANGED;
- }
+ std::optional<uint64_t> Align = getAssumedAlignForIntrinsic(A, *this, II);
+ if (Align.has_value()) {
+ uint64_t OldAssumed = getAssumed();
+ takeAssumedMinimum(Align.value());
+ return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
}
+ return ChangeStatus::UNCHANGED;
}
return Base::updateImpl(A);
};
diff --git a/llvm/test/Transforms/Attributor/align-ptrmask.ll b/llvm/test/Transforms/Attributor/align-ptrmask.ll
index 710f1c3983b7b..117e551f6ea47 100644
--- a/llvm/test/Transforms/Attributor/align-ptrmask.ll
+++ b/llvm/test/Transforms/Attributor/align-ptrmask.ll
@@ -1,101 +1,136 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -passes=attributor -S < %s | FileCheck %s
-define float @align_ptrmask_back_no_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
-; CHECK-LABEL: define float @align_ptrmask_back_no_prop(
-; CHECK-SAME: ptr nofree readonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0:[0-9]+]] {
+define ptr @align_ptrmask_back_no_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_no_prop(
+; CHECK-SAME: ptr nofree writeonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2:[0-9]+]]
-; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0:![0-9]+]]
-; CHECK-NEXT: ret float [[RES]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
+; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 -8
%sel1 = select i1 %cmp2, i64 %sel, i64 -16
- %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
- %res = load float, ptr %p, align 8
- ret float %res
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1)
+ store float 1.0, ptr %p, align 8
+ ret ptr %p
}
-define float @align_ptrmask_back_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
-; CHECK-LABEL: define float @align_ptrmask_back_prop(
-; CHECK-SAME: ptr nofree readonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+define ptr @align_ptrmask_back_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define noundef nonnull align 16 dereferenceable(4) ptr @align_ptrmask_back_prop(
+; CHECK-SAME: ptr nofree writeonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2]]
-; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 16, !invariant.load [[META0]]
-; CHECK-NEXT: ret float [[RES]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 16 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 16
+; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 -8
%sel1 = select i1 %cmp2, i64 %sel, i64 -16
- %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
- %res = load float, ptr %p, align 16
- ret float %res
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1)
+ store float 1.0, ptr %p, align 16
+ ret ptr %p
}
-define float @align_ptrmask_forward_mask(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
-; CHECK-LABEL: define float @align_ptrmask_forward_mask(
-; CHECK-SAME: ptr nofree readonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+define ptr @align_ptrmask_forward_mask(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define align 8 ptr @align_ptrmask_forward_mask(
+; CHECK-SAME: ptr nofree readnone align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2]]
-; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0]]
-; CHECK-NEXT: ret float [[RES]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 -8
%sel1 = select i1 %cmp2, i64 %sel, i64 -16
- %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
- %res = load float, ptr %p, align 4
- ret float %res
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1)
+ ret ptr %p
}
-define float @align_ptrmask_forward_ptr(ptr align 16 %x, i1 %cmp1, i1 %cmp2) {
-; CHECK-LABEL: define float @align_ptrmask_forward_ptr(
-; CHECK-SAME: ptr nofree readonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+define ptr @align_ptrmask_forward_ptr(ptr align 16 %x, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define align 16 ptr @align_ptrmask_forward_ptr(
+; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR2]]
-; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 16, !invariant.load [[META0]]
-; CHECK-NEXT: ret float [[RES]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 -8
%sel1 = select i1 %cmp2, i64 %sel, i64 -16
- %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
- %res = load float, ptr %p, align 4
- ret float %res
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1)
+ ret ptr %p
}
-define float @align_ptrmask_forward_nonconst_mask(ptr align 8 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
-; CHECK-LABEL: define float @align_ptrmask_forward_nonconst_mask(
-; CHECK-SAME: ptr nofree readonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+define ptr @align_ptrmask_forward_nonconst_mask(ptr align 8 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define align 8 ptr @align_ptrmask_forward_nonconst_mask(
+; CHECK-SAME: ptr nofree readnone align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]]
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR2]]
-; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0]]
-; CHECK-NEXT: ret float [[RES]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 %y
%sel1 = select i1 %cmp2, i64 %sel, i64 -16
- %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
- %res = load float, ptr %p, align 4
- ret float %res
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1)
+ ret ptr %p
}
-define float @align_ptrmask_back_nonconst_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
-; CHECK-LABEL: define float @align_ptrmask_back_nonconst_mask(
-; CHECK-SAME: ptr nofree readonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+define ptr @align_ptrmask_back_nonconst_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_nonconst_mask(
+; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]]
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR2]]
-; CHECK-NEXT: [[RES:%.*]] = load float, ptr [[P]], align 8, !invariant.load [[META0]]
-; CHECK-NEXT: ret float [[RES]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
+; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 %y
%sel1 = select i1 %cmp2, i64 %sel, i64 -16
- %p = tail call ptr @llvm.ptrmask(ptr %x, i64 %sel1)
- %res = load float, ptr %p, align 8
- ret float %res
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1)
+ store float 1.0, ptr %p, align 8
+ ret ptr %p
+}
+
+define ptr @align_ptrmask_back_const_back_noprop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_noprop(
+; CHECK-SAME: ptr nofree writeonly align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8)
+ store float 1.0, ptr %p, align 8
+ ret ptr %p
+}
+
+define ptr @align_ptrmask_back_const_back_prop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_prop(
+; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -2) #[[ATTR3]]
+; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -2)
+ store float 1.0, ptr %p, align 8
+ ret ptr %p
+}
+
+define ptr @align_ptrmask_back_const_forward_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define align 8 ptr @align_ptrmask_back_const_forward_mask(
+; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR3]]
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8)
+ ret ptr %p
+}
+
+define ptr @align_ptrmask_back_const_forward_ptr(ptr align 16 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
+; CHECK-LABEL: define align 16 ptr @align_ptrmask_back_const_forward_ptr(
+; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR3]]
+; CHECK-NEXT: ret ptr [[P]]
+;
+ %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8)
+ ret ptr %p
}
-;.
-; CHECK: [[META0]] = !{}
-;.
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 143cc3817bd08..e24a4814ab64c 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -482,7 +482,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("ascii")
+ readobj_out = readobj_cmd.stdout.read().decode("utf-8")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From 8091ffb5eda7572c9d914cbf8f86bdf9965e9969 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 28 Jul 2025 13:35:33 +0800
Subject: [PATCH 03/10] fix lit
---
llvm/test/lit.cfg.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index e24a4814ab64c..143cc3817bd08 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -482,7 +482,7 @@ def have_cxx_shared_library():
print("could not exec llvm-readobj")
return False
- readobj_out = readobj_cmd.stdout.read().decode("utf-8")
+ readobj_out = readobj_cmd.stdout.read().decode("ascii")
readobj_cmd.wait()
regex = re.compile(r"(libc\+\+|libstdc\+\+|msvcp).*\.(so|dylib|dll)")
>From 7b1abf243daceb3e73969f1ca6a375c0f96c1d32 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Mon, 28 Jul 2025 14:14:51 +0800
Subject: [PATCH 04/10] fix test cases
---
llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index e1b634eb328a7..fb070c4dcf2dc 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5256,6 +5256,7 @@ getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
if (Alignment != 0) {
return Alignment;
}
+ return QueryingAA.getAssumedAlign().value();
break;
}
default:
@@ -5594,7 +5595,6 @@ struct AAAlignCallSiteReturned final
return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
- return ChangeStatus::UNCHANGED;
}
return Base::updateImpl(A);
};
>From 258238f6905b696dd0dbc58de59dd219146b2fb6 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 29 Jul 2025 09:57:37 +0800
Subject: [PATCH 05/10] add test case not propagate extractelement
---
.../Transforms/Attributor/align-ptrmask.ll | 20 +++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/llvm/test/Transforms/Attributor/align-ptrmask.ll b/llvm/test/Transforms/Attributor/align-ptrmask.ll
index 117e551f6ea47..2c41ef62e2a31 100644
--- a/llvm/test/Transforms/Attributor/align-ptrmask.ll
+++ b/llvm/test/Transforms/Attributor/align-ptrmask.ll
@@ -134,3 +134,23 @@ define ptr @align_ptrmask_back_const_forward_ptr(ptr align 16 %x, i64 %y, i1 %cm
%p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8)
ret ptr %p
}
+
+; FIXME: The store will create AAAlign for %ptr1,
+; but the attribute didn't propagate through extractelement, need propagate
+define <2 x ptr> @ptrmask_v2p0_v2i64(<2 x ptr> align 2 %ptr, i64 %a) {
+; CHECK-LABEL: define <2 x ptr> @ptrmask_v2p0_v2i64(
+; CHECK-SAME: <2 x ptr> align 2 [[PTR:%.*]], i64 [[A:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[RESULT:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PTR]], <2 x i64> noundef splat (i64 -8)) #[[ATTR4]]
+; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x ptr> [[RESULT]], i32 0
+; CHECK-NEXT: [[PTR2:%.*]] = extractelement <2 x ptr> [[RESULT]], i32 1
+; CHECK-NEXT: store i64 [[A]], ptr [[PTR1]], align 16
+; CHECK-NEXT: store i64 [[A]], ptr [[PTR2]], align 16
+; CHECK-NEXT: ret <2 x ptr> [[RESULT]]
+;
+ %result = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> splat(i64 -8))
+ %ptr1 = extractelement <2 x ptr> %result, i32 0
+ %ptr2 = extractelement <2 x ptr> %result, i32 1
+ store i64 %a, ptr %ptr1, align 16
+ store i64 %a, ptr %ptr2, align 16
+ ret <2 x ptr> %result
+}
>From 88789839f1c92f56a9ca993adb186f093889a747 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 29 Jul 2025 10:40:01 +0800
Subject: [PATCH 06/10] fix test case
---
.../Transforms/Attributor/align-ptrmask.ll | 20 +++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/llvm/test/Transforms/Attributor/align-ptrmask.ll b/llvm/test/Transforms/Attributor/align-ptrmask.ll
index 2c41ef62e2a31..defe3e3374b78 100644
--- a/llvm/test/Transforms/Attributor/align-ptrmask.ll
+++ b/llvm/test/Transforms/Attributor/align-ptrmask.ll
@@ -6,7 +6,7 @@ define ptr @align_ptrmask_back_no_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
; CHECK-SAME: ptr nofree writeonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
; CHECK-NEXT: ret ptr [[P]]
;
@@ -22,7 +22,7 @@ define ptr @align_ptrmask_back_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
; CHECK-SAME: ptr nofree writeonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 16 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 16 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]]
; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 16
; CHECK-NEXT: ret ptr [[P]]
;
@@ -38,7 +38,7 @@ define ptr @align_ptrmask_forward_mask(ptr align 2 %x, i1 %cmp1, i1 %cmp2) {
; CHECK-SAME: ptr nofree readnone align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]]
; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 -8
@@ -52,7 +52,7 @@ define ptr @align_ptrmask_forward_ptr(ptr align 16 %x, i1 %cmp1, i1 %cmp2) {
; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]]
; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 -8
@@ -66,7 +66,7 @@ define ptr @align_ptrmask_forward_nonconst_mask(ptr align 8 %x, i64 %y, i1 %cmp1
; CHECK-SAME: ptr nofree readnone align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]]
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR4]]
; CHECK-NEXT: ret ptr [[P]]
;
%sel = select i1 %cmp1, i64 -32, i64 %y
@@ -80,7 +80,7 @@ define ptr @align_ptrmask_back_nonconst_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i
; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]]
; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16
-; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR4]]
; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
; CHECK-NEXT: ret ptr [[P]]
;
@@ -94,7 +94,7 @@ define ptr @align_ptrmask_back_nonconst_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i
define ptr @align_ptrmask_back_const_back_noprop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_noprop(
; CHECK-SAME: ptr nofree writeonly align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]]
; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
; CHECK-NEXT: ret ptr [[P]]
;
@@ -106,7 +106,7 @@ define ptr @align_ptrmask_back_const_back_noprop(ptr align 4 %x, i64 %y, i1 %cmp
define ptr @align_ptrmask_back_const_back_prop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_prop(
; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -2) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -2) #[[ATTR4]]
; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8
; CHECK-NEXT: ret ptr [[P]]
;
@@ -118,7 +118,7 @@ define ptr @align_ptrmask_back_const_back_prop(ptr align 4 %x, i64 %y, i1 %cmp1,
define ptr @align_ptrmask_back_const_forward_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
; CHECK-LABEL: define align 8 ptr @align_ptrmask_back_const_forward_mask(
; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]]
; CHECK-NEXT: ret ptr [[P]]
;
%p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8)
@@ -128,7 +128,7 @@ define ptr @align_ptrmask_back_const_forward_mask(ptr align 4 %x, i64 %y, i1 %cm
define ptr @align_ptrmask_back_const_forward_ptr(ptr align 16 %x, i64 %y, i1 %cmp1, i1 %cmp2) {
; CHECK-LABEL: define align 16 ptr @align_ptrmask_back_const_forward_ptr(
; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR3]]
+; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]]
; CHECK-NEXT: ret ptr [[P]]
;
%p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8)
>From ebbfc0b67720923f4b6a082ae22531b34eb2260c Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Thu, 31 Jul 2025 08:58:06 +0800
Subject: [PATCH 07/10] fix comments
---
.../Transforms/IPO/AttributorAttributes.cpp | 52 +++++++++----------
1 file changed, 24 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index fb070c4dcf2dc..24460b4db076d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5189,14 +5189,14 @@ struct AADereferenceableCallSiteReturned final
namespace {
-static std::optional<uint64_t>
-getKnownAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
- const IntrinsicInst *II) {
+static std::optional<Align> getKnownAlignForIntrinsic(Attributor &A,
+ AAAlign &QueryingAA,
+ const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
case Intrinsic::ptrmask: {
- auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
+ const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
QueryingAA, IRPosition::value(*(II->getOperand(1))), DepClassTy::NONE);
- const AAAlign *AlignAA = A.getAAFor<AAAlign>(
+ const auto *AlignAA = A.getAAFor<AAAlign>(
QueryingAA, IRPosition::value(*(II)), DepClassTy::NONE);
if (ConstVals && ConstVals->isValidState()) {
if (ConstVals->isAtFixpoint()) {
@@ -5204,19 +5204,19 @@ getKnownAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
unsigned Size =
DL.getPointerTypeSizeInBits(II->getOperand(0)->getType());
uint64_t TrailingZeros = Size;
- for (const auto &It : ConstVals->getAssumedSet())
+ for (const APInt &It : ConstVals->getAssumedSet())
if (It.countTrailingZeros() < TrailingZeros)
TrailingZeros = It.countTrailingZeros();
if (TrailingZeros < Size) {
uint64_t Mask = 1 << TrailingZeros;
if (Mask >= AlignAA->getKnownAlign().value()) {
- return 0;
+ return Align(1);
}
}
- return AlignAA->getKnownAlign().value();
+ return AlignAA->getKnownAlign();
}
} else if (AlignAA) {
- return AlignAA->getKnownAlign().value();
+ return AlignAA->getKnownAlign();
}
break;
}
@@ -5226,16 +5226,15 @@ getKnownAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
return std::nullopt;
}
-static std::optional<uint64_t>
+static std::optional<Align>
getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
case Intrinsic::ptrmask: {
- const AAPotentialConstantValues *ConstVals =
- A.getAAFor<AAPotentialConstantValues>(
- QueryingAA, IRPosition::value(*(II->getOperand(1))),
- DepClassTy::REQUIRED);
- const AAAlign *AlignAA =
+ const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
+ QueryingAA, IRPosition::value(*(II->getOperand(1))),
+ DepClassTy::REQUIRED);
+ const auto *AlignAA =
A.getAAFor<AAAlign>(QueryingAA, IRPosition::value(*(II->getOperand(0))),
DepClassTy::REQUIRED);
uint64_t Alignment = 0;
@@ -5243,7 +5242,7 @@ getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
const DataLayout &DL = A.getDataLayout();
unsigned Size = DL.getPointerTypeSizeInBits(II->getOperand(0)->getType());
unsigned TrailingZeros = Size;
- for (const auto &It : ConstVals->getAssumedSet())
+ for (const APInt &It : ConstVals->getAssumedSet())
if (It.countTrailingZeros() < TrailingZeros)
TrailingZeros = It.countTrailingZeros();
if (TrailingZeros < Size)
@@ -5253,11 +5252,9 @@ getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
Alignment < AlignAA->getAssumedAlign().value())
Alignment = AlignAA->getAssumedAlign().value();
- if (Alignment != 0) {
- return Alignment;
- }
- return QueryingAA.getAssumedAlign().value();
- break;
+ if (Alignment != 0)
+ return Align(Alignment);
+ return QueryingAA.getAssumedAlign();
}
default:
break;
@@ -5280,11 +5277,10 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
TrackUse = true;
return 0;
}
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- std::optional<uint64_t> Align =
- getKnownAlignForIntrinsic(A, QueryingAA, II);
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ std::optional<Align> Align = getKnownAlignForIntrinsic(A, QueryingAA, II);
if (Align.has_value())
- return Align.value();
+ return Align.value().value();
}
MaybeAlign MA;
@@ -5587,11 +5583,11 @@ struct AAAlignCallSiteReturned final
ChangeStatus updateImpl(Attributor &A) override {
Instruction *I = getIRPosition().getCtxI();
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
- std::optional<uint64_t> Align = getAssumedAlignForIntrinsic(A, *this, II);
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ std::optional<Align> Align = getAssumedAlignForIntrinsic(A, *this, II);
if (Align.has_value()) {
uint64_t OldAssumed = getAssumed();
- takeAssumedMinimum(Align.value());
+ takeAssumedMinimum(Align.value().value());
return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
: ChangeStatus::CHANGED;
}
>From 47dc43473f90704ff331b8eb3b36df93d0f69237 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Fri, 1 Aug 2025 14:59:29 +0800
Subject: [PATCH 08/10] fix matthew's comments
---
.../Transforms/IPO/AttributorAttributes.cpp | 33 ++++++++++---------
1 file changed, 18 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 24460b4db076d..0b4e0b0ad1fc8 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5203,16 +5203,16 @@ static std::optional<Align> getKnownAlignForIntrinsic(Attributor &A,
const DataLayout &DL = A.getDataLayout();
unsigned Size =
DL.getPointerTypeSizeInBits(II->getOperand(0)->getType());
- uint64_t TrailingZeros = Size;
+ uint64_t TrailingZeros = Size - 1;
for (const APInt &It : ConstVals->getAssumedSet())
if (It.countTrailingZeros() < TrailingZeros)
TrailingZeros = It.countTrailingZeros();
- if (TrailingZeros < Size) {
- uint64_t Mask = 1 << TrailingZeros;
- if (Mask >= AlignAA->getKnownAlign().value()) {
- return Align(1);
- }
- }
+
+ APInt Mask = APInt(Size, 1).shl(TrailingZeros);
+ APInt PtrAlign = APInt(Size, 1).shl(Log2(AlignAA->getKnownAlign()));
+ if (Mask.uge(PtrAlign))
+ return Align(1);
+
return AlignAA->getKnownAlign();
}
} else if (AlignAA) {
@@ -5237,22 +5237,25 @@ getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
const auto *AlignAA =
A.getAAFor<AAAlign>(QueryingAA, IRPosition::value(*(II->getOperand(0))),
DepClassTy::REQUIRED);
- uint64_t Alignment = 0;
+ Align Alignment;
+ bool NeedRet = false;
if (ConstVals && ConstVals->isValidState()) {
const DataLayout &DL = A.getDataLayout();
unsigned Size = DL.getPointerTypeSizeInBits(II->getOperand(0)->getType());
- unsigned TrailingZeros = Size;
+ unsigned TrailingZeros = Size - 1;
for (const APInt &It : ConstVals->getAssumedSet())
if (It.countTrailingZeros() < TrailingZeros)
TrailingZeros = It.countTrailingZeros();
- if (TrailingZeros < Size)
- Alignment = 1 << TrailingZeros;
+ Alignment = Align(1 << TrailingZeros);
+ NeedRet = true;
+ }
+ if (AlignAA && AlignAA->isValidState()) {
+ NeedRet = true;
+ if (Alignment < AlignAA->getAssumedAlign())
+ Alignment = AlignAA->getAssumedAlign();
}
- if (AlignAA && AlignAA->isValidState() &&
- Alignment < AlignAA->getAssumedAlign().value())
- Alignment = AlignAA->getAssumedAlign().value();
- if (Alignment != 0)
+ if (NeedRet)
return Align(Alignment);
return QueryingAA.getAssumedAlign();
}
>From 9985f5121b04b4d2a4fd65cde9664679d3bf78e1 Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Tue, 5 Aug 2025 18:02:17 +0800
Subject: [PATCH 09/10] try remove optional
---
.../Transforms/IPO/AttributorAttributes.cpp | 35 +--
.../X86/min-legal-vector-width.ll | 246 ++++++++++++------
2 files changed, 177 insertions(+), 104 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 4c962dc613288..8cfe14d326a1e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -5189,9 +5189,8 @@ struct AADereferenceableCallSiteReturned final
namespace {
-static std::optional<Align> getKnownAlignForIntrinsic(Attributor &A,
- AAAlign &QueryingAA,
- const IntrinsicInst *II) {
+Align getKnownAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
+ const IntrinsicInst *II) {
switch (II->getIntrinsicID()) {
case Intrinsic::ptrmask: {
const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
@@ -5223,12 +5222,12 @@ static std::optional<Align> getKnownAlignForIntrinsic(Attributor &A,
default:
break;
}
- return std::nullopt;
+ return Align(1);
}
-static std::optional<Align>
-getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
- const IntrinsicInst *II) {
+Align getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
+ const IntrinsicInst *II) {
+ Align Alignment;
switch (II->getIntrinsicID()) {
case Intrinsic::ptrmask: {
const auto *ConstVals = A.getAAFor<AAPotentialConstantValues>(
@@ -5237,7 +5236,6 @@ getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
const auto *AlignAA =
A.getAAFor<AAAlign>(QueryingAA, IRPosition::value(*(II->getOperand(0))),
DepClassTy::REQUIRED);
- Align Alignment;
bool NeedRet = false;
if (ConstVals && ConstVals->isValidState()) {
const DataLayout &DL = A.getDataLayout();
@@ -5262,7 +5260,7 @@ getAssumedAlignForIntrinsic(Attributor &A, AAAlign &QueryingAA,
default:
break;
}
- return std::nullopt;
+ return Alignment;
}
static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
@@ -5280,11 +5278,8 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA,
TrackUse = true;
return 0;
}
- if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- std::optional<Align> Align = getKnownAlignForIntrinsic(A, QueryingAA, II);
- if (Align.has_value())
- return Align.value().value();
- }
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return getKnownAlignForIntrinsic(A, QueryingAA, II).value();
MaybeAlign MA;
if (const auto *CB = dyn_cast<CallBase>(I)) {
@@ -5587,13 +5582,11 @@ struct AAAlignCallSiteReturned final
ChangeStatus updateImpl(Attributor &A) override {
Instruction *I = getIRPosition().getCtxI();
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
- std::optional<Align> Align = getAssumedAlignForIntrinsic(A, *this, II);
- if (Align.has_value()) {
- uint64_t OldAssumed = getAssumed();
- takeAssumedMinimum(Align.value().value());
- return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
- : ChangeStatus::CHANGED;
- }
+ Align Align = getAssumedAlignForIntrinsic(A, *this, II);
+ uint64_t OldAssumed = getAssumed();
+ takeAssumedMinimum(Align.value());
+ return OldAssumed == getAssumed() ? ChangeStatus::UNCHANGED
+ : ChangeStatus::CHANGED;
}
return Base::updateImpl(A);
};
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index 649e9467c0318..cce5caab86276 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -9,15 +9,25 @@ target triple = "x86_64-unknown-linux-gnu"
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0:![0-9]+]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -33,7 +43,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5:[0-9]+]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6:[0-9]+]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
@@ -66,15 +76,25 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -90,7 +110,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
@@ -123,15 +143,25 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -147,7 +177,7 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
@@ -180,15 +210,25 @@ bb:
; This should promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -204,7 +244,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
@@ -237,13 +277,21 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64, !invariant.load [[META0]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -259,7 +307,7 @@ define void @avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: call fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[TMP]]) #[[ATTR6]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -290,13 +338,21 @@ bb:
; This should not promote
define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64, !invariant.load [[META0]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -312,7 +368,7 @@ define void @avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg)
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[TMP]]) #[[ATTR6]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
; TUNIT-NEXT: store <8 x i64> [[TMP4]], ptr [[ARG]], align 2
@@ -343,15 +399,25 @@ bb:
; This should promote
define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -367,7 +433,7 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg) #4 {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; TUNIT-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
@@ -400,15 +466,25 @@ bb:
; This should promote
define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 {
;
-; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
-; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT: bb:
-; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
-; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
-; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
-; CHECK-NEXT: ret void
+; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; TUNIT-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT: bb:
+; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64
+; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; TUNIT-NEXT: ret void
+;
+; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable
+; CGSCC-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] {
+; CGSCC-NEXT: bb:
+; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64
+; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64
+; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]]
+; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64
+; CGSCC-NEXT: ret void
;
bb:
%tmp = load <8 x i64>, ptr %arg1
@@ -424,7 +500,7 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg) #3 {
; TUNIT-NEXT: bb:
; TUNIT-NEXT: [[TMP:%.*]] = alloca <8 x i64>, align 32
; TUNIT-NEXT: [[TMP2:%.*]] = alloca <8 x i64>, align 32
-; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 32 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
+; TUNIT-NEXT: call void @llvm.memset.p0.i64(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR5]]
; TUNIT-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[TMP]], align 64
; TUNIT-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR6]]
; TUNIT-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr [[TMP2]], align 64
@@ -464,6 +540,14 @@ attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2
attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" }
attributes #5 = { argmemonly nounwind }
;.
+; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
+; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
+; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
+; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
+; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) }
+; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn }
+;.
; TUNIT: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
; TUNIT: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
; TUNIT: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
@@ -472,11 +556,7 @@ attributes #5 = { argmemonly nounwind }
; TUNIT: attributes #[[ATTR5]] = { nofree willreturn memory(write) }
; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn }
;.
-; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" }
-; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" }
-; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" }
-; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" }
-; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
-; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) }
-; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn }
+; CGSCC: [[META0]] = !{}
;.
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
>From 3d8aaad6f5d4f126a5f0fe12431468668bf4094c Mon Sep 17 00:00:00 2001
From: shore <372660931 at qq.com>
Date: Wed, 6 Aug 2025 11:59:13 +0800
Subject: [PATCH 10/10] fix test case
---
llvm/test/Transforms/OpenMP/parallel_deletion.ll | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index 3e16d96eb482b..2aac82e28b418 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -282,7 +282,7 @@ define void @delete_parallel_2() {
; CHECK-LABEL: define {{[^@]+}}@delete_parallel_2() {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 dereferenceable(4) [[A]]) #[[ATTR18:[0-9]+]]
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull dereferenceable(4) [[A]]) #[[ATTR18:[0-9]+]]
; CHECK-NEXT: store i32 0, ptr [[A]], align 4
; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..3, ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]])
; CHECK-NEXT: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 noundef 1, ptr noundef nonnull @.omp_outlined..4, ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]])
@@ -531,7 +531,7 @@ define internal void @.omp_outlined..6(ptr noalias %.global_tid., ptr noalias %.
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A1:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x ptr], align 8
-; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull align 4 [[A1]]) #[[ATTR20:[0-9]+]]
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 4, ptr noundef nonnull [[A1]]) #[[ATTR20:[0-9]+]]
; CHECK-NEXT: store i32 1, ptr [[A1]], align 4
; CHECK-NEXT: store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8
; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
More information about the llvm-commits
mailing list