[llvm] [LV] Fold isPredicatedInst into isMaskRequired (PR #134261)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Tue May 13 02:39:42 PDT 2025
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/134261
>From d8bc3a7911f2987f4e9b58c393f2ea610c2e052e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 3 Apr 2025 14:49:08 +0100
Subject: [PATCH 1/2] [LV] Fold isPredicatedInst into isMaskRequired
Fold LoopVectorizationCostModel::isPredicatedInst into
LoopVectorizationLegality::isMaskRequired, fixing a pending TODO item.
Note that we still need to pass whether or not we're tail-folding by
masking from the cost-model into isMaskRequired.
---
.../Vectorize/LoopVectorizationLegality.h | 7 +-
.../Transforms/Vectorize/LoopVectorize.cpp | 75 +++++-----
.../AArch64/veclib-intrinsic-calls.ll | 136 +++++++++---------
3 files changed, 107 insertions(+), 111 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..1cb901cd9518e 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -418,11 +418,12 @@ class LoopVectorizationLegality {
return LAI->getDepChecker().getStoreLoadForwardSafeDistanceInBits();
}
+ /// Returns true if MaskedOp contains \p I.
+ bool isMasked(Instruction *I) const { return MaskedOp.contains(I); }
+
/// Returns true if vector representation of the instruction \p I
/// requires mask.
- bool isMaskRequired(const Instruction *I) const {
- return MaskedOp.contains(I);
- }
+ bool isMaskRequired(Instruction *I, bool FoldTailByMasking) const;
/// Returns true if there is at least one function call in the loop which
/// has a vectorized variant available.
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2393ac7182dfd..2717adc8178dd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1292,11 +1292,6 @@ class LoopVectorizationCostModel {
/// \p VF is the vectorization factor that will be used to vectorize \p I.
bool isScalarWithPredication(Instruction *I, ElementCount VF) const;
- /// Returns true if \p I is an instruction that needs to be predicated
- /// at runtime. The result is independent of the predication mechanism.
- /// Superset of instructions that return true for isScalarWithPredication.
- bool isPredicatedInst(Instruction *I) const;
-
/// Return the costs for our two available strategies for lowering a
/// div/rem operation which requires speculating at least one lane.
/// First result is for scalarization (will be invalid for scalable
@@ -3018,7 +3013,7 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
bool LoopVectorizationCostModel::isScalarWithPredication(
Instruction *I, ElementCount VF) const {
- if (!isPredicatedInst(I))
+ if (!Legal->isMaskRequired(I, foldTailByMasking()))
return false;
// Do we have a non-scalar lowering for this predicated
@@ -3057,22 +3052,20 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
}
}
-// TODO: Fold into LoopVectorizationLegality::isMaskRequired.
-bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
- // TODO: We can use the loop-preheader as context point here and get
- // context sensitive reasoning for isSafeToSpeculativelyExecute.
- if (isSafeToSpeculativelyExecute(I) ||
- (isa<LoadInst, StoreInst, CallInst>(I) && !Legal->isMaskRequired(I)) ||
+bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
+ bool FoldTailByMasking) const {
+ if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
+ (isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
return false;
// If the instruction was executed conditionally in the original scalar loop,
// predication is needed with a mask whose lanes are all possibly inactive.
- if (Legal->blockNeedsPredication(I->getParent()))
+ if (blockNeedsPredication(I->getParent()))
return true;
- // If we're not folding the tail by masking, predication is unnecessary.
- if (!foldTailByMasking())
+ // If we're not folding tail by masking, bail out now.
+ if (!FoldTailByMasking)
return false;
// All that remain are instructions with side-effects originally executed in
@@ -3080,25 +3073,25 @@ bool LoopVectorizationCostModel::isPredicatedInst(Instruction *I) const {
// having at least one active lane (the first). If the side-effects of the
// instruction are invariant, executing it w/o (the tail-folding) mask is safe
// - it will cause the same side-effects as when masked.
- switch(I->getOpcode()) {
+ switch (I->getOpcode()) {
default:
llvm_unreachable(
"instruction should have been considered by earlier checks");
case Instruction::Call:
// Side-effects of a Call are assumed to be non-invariant, needing a
// (fold-tail) mask.
- assert(Legal->isMaskRequired(I) &&
+ assert(MaskedOp.contains(I) &&
"should have returned earlier for calls not needing a mask");
return true;
case Instruction::Load:
// If the address is loop invariant no predication is needed.
- return !Legal->isInvariant(getLoadStorePointerOperand(I));
+ return !isInvariant(getLoadStorePointerOperand(I));
case Instruction::Store: {
// For stores, we need to prove both speculation safety (which follows from
// the same argument as loads), but also must prove the value being stored
// is correct. The easiest form of the later is to require that all values
// stored are the same.
- return !(Legal->isInvariant(getLoadStorePointerOperand(I)) &&
+ return !(isInvariant(getLoadStorePointerOperand(I)) &&
TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
}
case Instruction::UDiv:
@@ -3222,7 +3215,7 @@ bool LoopVectorizationCostModel::interleavedAccessCanBeWidened(
// load, or any gaps in a store-access).
bool PredicatedAccessRequiresMasking =
blockNeedsPredicationForAnyReason(I->getParent()) &&
- Legal->isMaskRequired(I);
+ Legal->isMaskRequired(I, foldTailByMasking());
bool LoadAccessWithGapsRequiresEpilogMasking =
isa<LoadInst>(I) && Group->requiresScalarEpilogue() &&
!isScalarEpilogueAllowed();
@@ -3312,7 +3305,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
<< *I << "\n");
return;
}
- if (isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
LLVM_DEBUG(
dbgs() << "LV: Found not uniform due to requiring predication: " << *I
<< "\n");
@@ -5450,7 +5443,7 @@ bool LoopVectorizationCostModel::useEmulatedMaskMemRefHack(Instruction *I,
// from moving "masked load/store" check from legality to cost model.
// Masked Load/Gather emulation was previously never allowed.
// Limited number of Masked Store/Scatter emulation was allowed.
- assert((isPredicatedInst(I)) &&
+ assert((Legal->isMaskRequired(I, foldTailByMasking())) &&
"Expecting a scalar emulated instruction");
return isa<LoadInst>(I) ||
(isa<StoreInst>(I) &&
@@ -5752,7 +5745,7 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
// If we have a predicated load/store, it will need extra i1 extracts and
// conditional branches, but may not be executed for each vector lane. Scale
// the cost by the probability of executing the predicated block.
- if (isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
Cost /= getPredBlockCostDivisor(CostKind);
// Add the cost of an i1 extract and a branch
@@ -5785,7 +5778,7 @@ LoopVectorizationCostModel::getConsecutiveMemOpCost(Instruction *I,
"Stride should be 1 or -1 for consecutive memory access");
const Align Alignment = getLoadStoreAlignment(I);
InstructionCost Cost = 0;
- if (Legal->isMaskRequired(I)) {
+ if (Legal->isMaskRequired(I, foldTailByMasking())) {
Cost += TTI.getMaskedMemoryOpCost(I->getOpcode(), VectorTy, Alignment, AS,
CostKind);
} else {
@@ -5838,9 +5831,10 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
const Value *Ptr = getLoadStorePointerOperand(I);
return TTI.getAddressComputationCost(VectorTy) +
- TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
- Legal->isMaskRequired(I), Alignment,
- CostKind, I);
+ TTI.getGatherScatterOpCost(
+ I->getOpcode(), VectorTy, Ptr,
+ Legal->isMaskRequired(I, foldTailByMasking()), Alignment, CostKind,
+ I);
}
InstructionCost
@@ -5869,12 +5863,12 @@ LoopVectorizationCostModel::getInterleaveGroupCost(Instruction *I,
(isa<StoreInst>(I) && (Group->getNumMembers() < Group->getFactor()));
InstructionCost Cost = TTI.getInterleavedMemoryOpCost(
InsertPos->getOpcode(), WideVecTy, Group->getFactor(), Indices,
- Group->getAlign(), AS, CostKind, Legal->isMaskRequired(I),
- UseMaskForGaps);
+ Group->getAlign(), AS, CostKind,
+ Legal->isMaskRequired(I, foldTailByMasking()), UseMaskForGaps);
if (Group->isReverse()) {
// TODO: Add support for reversed masked interleaved access.
- assert(!Legal->isMaskRequired(I) &&
+ assert(!Legal->isMaskRequired(I, foldTailByMasking()) &&
"Reverse masked interleaved access not supported.");
Cost += Group->getNumMembers() *
TTI.getShuffleCost(TargetTransformInfo::SK_Reverse, VectorTy, {},
@@ -6367,7 +6361,7 @@ void LoopVectorizationCostModel::setVectorizedCallDecision(ElementCount VF) {
continue;
}
- bool MaskRequired = Legal->isMaskRequired(CI);
+ bool MaskRequired = Legal->isMasked(CI);
// Compute corresponding vector type for return value and arguments.
Type *RetTy = toVectorizedTy(ScalarRetTy, VF);
for (Type *ScalarTy : ScalarTys)
@@ -6487,7 +6481,7 @@ bool LoopVectorizationCostModel::shouldConsiderInvariant(Value *Op) {
// instruction in the loop. In that case, it is not trivially hoistable.
auto *OpI = dyn_cast<Instruction>(Op);
return !OpI || !TheLoop->contains(OpI) ||
- (!isPredicatedInst(OpI) &&
+ (!Legal->isMaskRequired(OpI, foldTailByMasking()) &&
(!isa<PHINode>(OpI) || OpI->getParent() != TheLoop->getHeader()) &&
all_of(OpI->operands(),
[this](Value *Op) { return shouldConsiderInvariant(Op); }));
@@ -6675,7 +6669,7 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
case Instruction::SDiv:
case Instruction::URem:
case Instruction::SRem:
- if (VF.isVector() && isPredicatedInst(I)) {
+ if (VF.isVector() && Legal->isMaskRequired(I, foldTailByMasking())) {
const auto [ScalarCost, SafeDivisorCost] = getDivRemSpeculationCost(I, VF);
return isDivRemScalarWithPredication(ScalarCost, SafeDivisorCost) ?
ScalarCost : SafeDivisorCost;
@@ -6859,8 +6853,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
return TTI::CastContextHint::Interleave;
case LoopVectorizationCostModel::CM_Scalarize:
case LoopVectorizationCostModel::CM_Widen:
- return Legal->isMaskRequired(I) ? TTI::CastContextHint::Masked
- : TTI::CastContextHint::Normal;
+ return Legal->isMaskRequired(I, foldTailByMasking())
+ ? TTI::CastContextHint::Masked
+ : TTI::CastContextHint::Normal;
case LoopVectorizationCostModel::CM_Widen_Reverse:
return TTI::CastContextHint::Reversed;
case LoopVectorizationCostModel::CM_Unknown:
@@ -8417,7 +8412,7 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
return nullptr;
VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(I))
+ if (Legal->isMaskRequired(I, CM.foldTailByMasking()))
Mask = getBlockInMask(Builder.getInsertBlock());
// Determine if the pointer operand of the access is either consecutive or
@@ -8644,7 +8639,7 @@ VPSingleDefRecipe *VPRecipeBuilder::tryToWidenCall(CallInst *CI,
// vector variant at this VF requires a mask, so we synthesize an
// all-true mask.
VPValue *Mask = nullptr;
- if (Legal->isMaskRequired(CI))
+ if (Legal->isMaskRequired(CI, CM.foldTailByMasking()))
Mask = getBlockInMask(Builder.getInsertBlock());
else
Mask = Plan.getOrAddLiveIn(
@@ -8685,7 +8680,7 @@ VPWidenRecipe *VPRecipeBuilder::tryToWiden(Instruction *I,
case Instruction::URem: {
// If not provably safe, use a select to form a safe divisor before widening the
// div/rem operation itself. Otherwise fall through to general handling below.
- if (CM.isPredicatedInst(I)) {
+ if (Legal->isMaskRequired(I, CM.foldTailByMasking())) {
SmallVector<VPValue *> Ops(Operands);
VPValue *Mask = getBlockInMask(Builder.getInsertBlock());
VPValue *One =
@@ -8768,7 +8763,7 @@ VPRecipeBuilder::tryToWidenHistogram(const HistogramInfo *HI,
// In case of predicated execution (due to tail-folding, or conditional
// execution, or both), pass the relevant mask.
- if (Legal->isMaskRequired(HI->Store))
+ if (Legal->isMaskRequired(HI->Store, CM.foldTailByMasking()))
HGramOps.push_back(getBlockInMask(Builder.getInsertBlock()));
return new VPHistogramRecipe(Opcode, HGramOps, HI->Store->getDebugLoc());
@@ -8781,7 +8776,7 @@ VPRecipeBuilder::handleReplication(Instruction *I, ArrayRef<VPValue *> Operands,
[&](ElementCount VF) { return CM.isUniformAfterVectorization(I, VF); },
Range);
- bool IsPredicated = CM.isPredicatedInst(I);
+ bool IsPredicated = Legal->isMaskRequired(I, CM.foldTailByMasking());
// Even if the instruction is not marked as uniform, there are certain
// intrinsic calls that can be effectively treated as such, so we check for
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
index f753df32d9ebc..5d45940a10beb 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/veclib-intrinsic-calls.ll
@@ -22,7 +22,7 @@ define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @acos_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_acos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @acos_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
@@ -30,7 +30,7 @@ define void @acos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @acos_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1:[0-9]+]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svacos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -57,7 +57,7 @@ define void @acos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @acos_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_acosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @acos_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -65,7 +65,7 @@ define void @acos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @acos_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svacos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -95,7 +95,7 @@ define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @asin_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_asin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @asin_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -103,7 +103,7 @@ define void @asin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @asin_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svasin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -130,7 +130,7 @@ define void @asin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @asin_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_asinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @asin_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -138,7 +138,7 @@ define void @asin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @asin_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svasin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -168,7 +168,7 @@ define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @atan_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_atan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @atan_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -176,7 +176,7 @@ define void @atan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @atan_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svatan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -203,7 +203,7 @@ define void @atan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @atan_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_atanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @atan_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -211,7 +211,7 @@ define void @atan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @atan_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svatan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -241,7 +241,7 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @atan2_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_atan2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @atan2_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -249,7 +249,7 @@ define void @atan2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @atan2_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svatan2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -276,7 +276,7 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @atan2_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_atan2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @atan2_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -284,7 +284,7 @@ define void @atan2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @atan2_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svatan2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -460,7 +460,7 @@ define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @cos_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cos(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @cos_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -468,7 +468,7 @@ define void @cos_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @cos_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svcos_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -495,7 +495,7 @@ define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @cos_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_cosf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @cos_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -503,7 +503,7 @@ define void @cos_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @cos_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svcos_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -533,7 +533,7 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @cosh_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_cosh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @cosh_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -541,7 +541,7 @@ define void @cosh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @cosh_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svcosh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -568,7 +568,7 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @cosh_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_coshf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @cosh_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -576,7 +576,7 @@ define void @cosh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @cosh_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svcosh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -606,7 +606,7 @@ define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @exp_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @exp_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -614,7 +614,7 @@ define void @exp_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @exp_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svexp_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -641,7 +641,7 @@ define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @exp_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_expf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @exp_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -649,7 +649,7 @@ define void @exp_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @exp_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svexp_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -679,7 +679,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @exp10_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @exp10_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -687,7 +687,7 @@ define void @exp10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @exp10_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svexp10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -714,7 +714,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @exp10_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @exp10_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -722,7 +722,7 @@ define void @exp10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @exp10_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svexp10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -752,7 +752,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @exp2_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_exp2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @exp2_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -760,7 +760,7 @@ define void @exp2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @exp2_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svexp2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -787,7 +787,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @exp2_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_exp2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @exp2_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -795,7 +795,7 @@ define void @exp2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @exp2_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svexp2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1044,7 +1044,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @log_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @log_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1052,7 +1052,7 @@ define void @log_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @log_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svlog_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1079,7 +1079,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @log_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_logf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @log_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1087,7 +1087,7 @@ define void @log_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @log_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svlog_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1117,7 +1117,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @log10_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log10(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @log10_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1125,7 +1125,7 @@ define void @log10_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @log10_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svlog10_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1152,7 +1152,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @log10_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log10f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @log10_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1160,7 +1160,7 @@ define void @log10_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @log10_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svlog10_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1190,7 +1190,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @log2_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_log2(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @log2_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1198,7 +1198,7 @@ define void @log2_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @log2_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svlog2_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1225,7 +1225,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @log2_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_log2f(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @log2_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1233,7 +1233,7 @@ define void @log2_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @log2_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svlog2_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1482,7 +1482,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @pow_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxvv_pow(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @pow_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1490,7 +1490,7 @@ define void @pow_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @pow_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svpow_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x double> [[WIDE_MASKED_LOAD]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1517,7 +1517,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @pow_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxvv_powf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @pow_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1525,7 +1525,7 @@ define void @pow_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @pow_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svpow_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1701,7 +1701,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @sin_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sin(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @sin_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1709,7 +1709,7 @@ define void @sin_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @sin_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svsin_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1736,7 +1736,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @sin_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @sin_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1744,7 +1744,7 @@ define void @sin_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @sin_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svsin_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1774,7 +1774,7 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @sinh_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_sinh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @sinh_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1782,7 +1782,7 @@ define void @sinh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @sinh_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svsinh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1809,7 +1809,7 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @sinh_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_sinhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @sinh_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1817,7 +1817,7 @@ define void @sinh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @sinh_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svsinh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1920,7 +1920,7 @@ define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @tan_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tan(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @tan_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1928,7 +1928,7 @@ define void @tan_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @tan_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svtan_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1955,7 +1955,7 @@ define void @tan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @tan_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @tan_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -1963,7 +1963,7 @@ define void @tan_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @tan_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svtan_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -1993,7 +1993,7 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @tanh_f64
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @_ZGVsMxv_tanh(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @tanh_f64
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -2001,7 +2001,7 @@ define void @tanh_f64(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @tanh_f64
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 2 x double> @armpl_svtanh_f64_x(<vscale x 2 x double> [[WIDE_MASKED_LOAD:%.*]], <vscale x 2 x i1> splat (i1 true))
;
entry:
br label %for.body
@@ -2028,7 +2028,7 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; SLEEF-SVE-LABEL: define void @tanh_f32
; SLEEF-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; SLEEF-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @_ZGVsMxv_tanhf(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
; ARMPL-NEON-LABEL: define void @tanh_f32
; ARMPL-NEON-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
@@ -2036,7 +2036,7 @@ define void @tanh_f32(ptr noalias %in.ptr, ptr %out.ptr) {
;
; ARMPL-SVE-LABEL: define void @tanh_f32
; ARMPL-SVE-SAME: (ptr noalias [[IN_PTR:%.*]], ptr [[OUT_PTR:%.*]]) #[[ATTR1]] {
-; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK:%.*]])
+; ARMPL-SVE: [[TMP12:%.*]] = call <vscale x 4 x float> @armpl_svtanh_f32_x(<vscale x 4 x float> [[WIDE_MASKED_LOAD:%.*]], <vscale x 4 x i1> splat (i1 true))
;
entry:
br label %for.body
>From d46f27977e64318c17ce463d72439f8ff88030fc Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Tue, 13 May 2025 10:35:24 +0100
Subject: [PATCH 2/2] [LV] Move isMaskRequired to LVLegality
---
.../Vectorize/LoopVectorizationLegality.cpp | 51 +++++++++++++++++++
.../Transforms/Vectorize/LoopVectorize.cpp | 51 -------------------
2 files changed, 51 insertions(+), 51 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 8e09e6f8d4935..360eb11992240 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -1408,6 +1408,57 @@ bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
}
+bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
+ bool FoldTailByMasking) const {
+ if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
+ (isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
+ isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
+ return false;
+
+ // If the instruction was executed conditionally in the original scalar loop,
+ // predication is needed with a mask whose lanes are all possibly inactive.
+ if (blockNeedsPredication(I->getParent()))
+ return true;
+
+ // If we're not folding tail by masking, bail out now.
+ if (!FoldTailByMasking)
+ return false;
+
+ // All that remain are instructions with side-effects originally executed in
+ // the loop unconditionally, but now execute under a tail-fold mask (only)
+ // having at least one active lane (the first). If the side-effects of the
+ // instruction are invariant, executing it w/o (the tail-folding) mask is safe
+ // - it will cause the same side-effects as when masked.
+ switch (I->getOpcode()) {
+ default:
+ llvm_unreachable(
+ "instruction should have been considered by earlier checks");
+ case Instruction::Call:
+ // Side-effects of a Call are assumed to be non-invariant, needing a
+ // (fold-tail) mask.
+ assert(MaskedOp.contains(I) &&
+ "should have returned earlier for calls not needing a mask");
+ return true;
+ case Instruction::Load:
+ // If the address is loop invariant no predication is needed.
+ return !isInvariant(getLoadStorePointerOperand(I));
+ case Instruction::Store: {
+ // For stores, we need to prove both speculation safety (which follows from
+ // the same argument as loads), but also must prove the value being stored
+ // is correct. The easiest form of the later is to require that all values
+ // stored are the same.
+ return !(isInvariant(getLoadStorePointerOperand(I)) &&
+ TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
+ }
+ case Instruction::UDiv:
+ case Instruction::SDiv:
+ case Instruction::SRem:
+ case Instruction::URem:
+ // If the divisor is loop-invariant no predication is needed.
+ return !TheLoop->isLoopInvariant(I->getOperand(1));
+ }
+}
+
bool LoopVectorizationLegality::blockCanBePredicated(
BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2717adc8178dd..e5ae9833bb342 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3052,57 +3052,6 @@ bool LoopVectorizationCostModel::isScalarWithPredication(
}
}
-bool LoopVectorizationLegality::isMaskRequired(Instruction *I,
- bool FoldTailByMasking) const {
- if (isSafeToSpeculativelyExecute(I, TheLoop->getLatchCmpInst()) ||
- (isa<LoadInst, StoreInst, CallInst>(I) && !MaskedOp.contains(I)) ||
- isa<BranchInst, SwitchInst, PHINode, AllocaInst>(I))
- return false;
-
- // If the instruction was executed conditionally in the original scalar loop,
- // predication is needed with a mask whose lanes are all possibly inactive.
- if (blockNeedsPredication(I->getParent()))
- return true;
-
- // If we're not folding tail by masking, bail out now.
- if (!FoldTailByMasking)
- return false;
-
- // All that remain are instructions with side-effects originally executed in
- // the loop unconditionally, but now execute under a tail-fold mask (only)
- // having at least one active lane (the first). If the side-effects of the
- // instruction are invariant, executing it w/o (the tail-folding) mask is safe
- // - it will cause the same side-effects as when masked.
- switch (I->getOpcode()) {
- default:
- llvm_unreachable(
- "instruction should have been considered by earlier checks");
- case Instruction::Call:
- // Side-effects of a Call are assumed to be non-invariant, needing a
- // (fold-tail) mask.
- assert(MaskedOp.contains(I) &&
- "should have returned earlier for calls not needing a mask");
- return true;
- case Instruction::Load:
- // If the address is loop invariant no predication is needed.
- return !isInvariant(getLoadStorePointerOperand(I));
- case Instruction::Store: {
- // For stores, we need to prove both speculation safety (which follows from
- // the same argument as loads), but also must prove the value being stored
- // is correct. The easiest form of the later is to require that all values
- // stored are the same.
- return !(isInvariant(getLoadStorePointerOperand(I)) &&
- TheLoop->isLoopInvariant(cast<StoreInst>(I)->getValueOperand()));
- }
- case Instruction::UDiv:
- case Instruction::SDiv:
- case Instruction::SRem:
- case Instruction::URem:
- // If the divisor is loop-invariant no predication is needed.
- return !TheLoop->isLoopInvariant(I->getOperand(1));
- }
-}
-
std::pair<InstructionCost, InstructionCost>
LoopVectorizationCostModel::getDivRemSpeculationCost(Instruction *I,
ElementCount VF) const {
More information about the llvm-commits
mailing list