[llvm] [IVDescriptors] Remove function FMF attribute check for FP min/max reduction (PR #183523)
Mel Chen via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 26 05:48:10 PST 2026
https://github.com/Mel-Chen updated https://github.com/llvm/llvm-project/pull/183523
>From ebf554312d5f87da4a4761dbea2e4270d5518476 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen at sifive.com>
Date: Thu, 26 Feb 2026 00:35:56 -0800
Subject: [PATCH] [IVDescriptors] Remove function FMF attribute check for FP
min/max reduction
Remove the use of function attributes "no-nans-fp-math" and
"no-signed-zeros-fp-math" in FP min/max reduction detection. The
required required fast-math flags nnan and nsz should be present on the
intrinisc call, fcmp and select instructions themselves.
---
llvm/include/llvm/Analysis/IVDescriptors.h | 13 +-
llvm/lib/Analysis/IVDescriptors.cpp | 73 ++++--------
.../AArch64/scalable-reductions.ll | 17 ++-
.../LoopVectorize/RISCV/reductions.ll | 71 ++++++-----
.../RISCV/tail-folding-inloop-reduction.ll | 13 +-
.../RISCV/tail-folding-reduction.ll | 21 ++--
.../LoopVectorize/X86/reduction-fastmath.ll | 35 +++---
.../float-minmax-instruction-flag.ll | 37 +++---
.../LoopVectorize/minmax_reduction.ll | 112 +++++++++---------
9 files changed, 179 insertions(+), 213 deletions(-)
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index 592eb2e90c1c6..8383da0f9e879 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -151,9 +151,10 @@ class RecurrenceDescriptor {
/// advances the instruction pointer 'I' from the compare instruction to the
/// select instruction and stores this pointer in 'PatternLastInst' member of
/// the returned struct.
- LLVM_ABI static InstDesc
- isRecurrenceInstr(Loop *L, PHINode *Phi, Instruction *I, RecurKind Kind,
- InstDesc &Prev, FastMathFlags FuncFMF, ScalarEvolution *SE);
+ LLVM_ABI static InstDesc isRecurrenceInstr(Loop *L, PHINode *Phi,
+ Instruction *I, RecurKind Kind,
+ InstDesc &Prev,
+ ScalarEvolution *SE);
/// Returns true if instruction I has multiple uses in Insts
LLVM_ABI static bool hasMultipleUsesOf(Instruction *I,
@@ -195,9 +196,9 @@ class RecurrenceDescriptor {
/// computed.
LLVM_ABI static bool
AddReductionVar(PHINode *Phi, RecurKind Kind, Loop *TheLoop,
- FastMathFlags FuncFMF, RecurrenceDescriptor &RedDes,
- DemandedBits *DB = nullptr, AssumptionCache *AC = nullptr,
- DominatorTree *DT = nullptr, ScalarEvolution *SE = nullptr);
+ RecurrenceDescriptor &RedDes, DemandedBits *DB = nullptr,
+ AssumptionCache *AC = nullptr, DominatorTree *DT = nullptr,
+ ScalarEvolution *SE = nullptr);
/// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
/// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 349f23bde21d4..616265ac707bb 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -229,10 +229,8 @@ static FastMathFlags collectMinMaxFMF(Value *V) {
}
static std::optional<FastMathFlags>
-hasRequiredFastMathFlags(FPMathOperator *FPOp, RecurKind &RK,
- FastMathFlags FuncFMF) {
- bool HasRequiredFMF = (FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
- (FPOp && FPOp->hasNoNaNs() && FPOp->hasNoSignedZeros());
+hasRequiredFastMathFlags(FPMathOperator *FPOp, RecurKind &RK) {
+ bool HasRequiredFMF = FPOp && FPOp->hasNoNaNs() && FPOp->hasNoSignedZeros();
if (HasRequiredFMF)
return collectMinMaxFMF(FPOp);
@@ -260,7 +258,6 @@ hasRequiredFastMathFlags(FPMathOperator *FPOp, RecurKind &RK,
}
static RecurrenceDescriptor getMinMaxRecurrence(PHINode *Phi, Loop *TheLoop,
- FastMathFlags FuncFMF,
ScalarEvolution *SE) {
Type *Ty = Phi->getType();
BasicBlock *Latch = TheLoop->getLoopLatch();
@@ -321,8 +318,7 @@ static RecurrenceDescriptor getMinMaxRecurrence(PHINode *Phi, Loop *TheLoop,
RK = CurRK;
// Check required fast-math flags for FP recurrences.
if (RecurrenceDescriptor::isFPMinMaxRecurrenceKind(CurRK)) {
- auto CurFMF =
- hasRequiredFastMathFlags(cast<FPMathOperator>(Cur), RK, FuncFMF);
+ auto CurFMF = hasRequiredFastMathFlags(cast<FPMathOperator>(Cur), RK);
if (!CurFMF)
return {};
FMF &= *CurFMF;
@@ -437,9 +433,9 @@ static bool isFindLastLikePhi(PHINode *Phi, PHINode *HeaderPhi,
}
bool RecurrenceDescriptor::AddReductionVar(
- PHINode *Phi, RecurKind Kind, Loop *TheLoop, FastMathFlags FuncFMF,
- RecurrenceDescriptor &RedDes, DemandedBits *DB, AssumptionCache *AC,
- DominatorTree *DT, ScalarEvolution *SE) {
+ PHINode *Phi, RecurKind Kind, Loop *TheLoop, RecurrenceDescriptor &RedDes,
+ DemandedBits *DB, AssumptionCache *AC, DominatorTree *DT,
+ ScalarEvolution *SE) {
if (Phi->getNumIncomingValues() != 2)
return false;
@@ -610,8 +606,7 @@ bool RecurrenceDescriptor::AddReductionVar(
// the starting value (the Phi or an AND instruction if the Phi has been
// type-promoted).
if (Cur != Start) {
- ReduxDesc =
- isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF, SE);
+ ReduxDesc = isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, SE);
ExactFPMathInst = ExactFPMathInst == nullptr
? ReduxDesc.getExactFPMathInst()
: ExactFPMathInst;
@@ -985,9 +980,10 @@ RecurrenceDescriptor::isConditionalRdxPattern(Instruction *I) {
return InstDesc(true, I);
}
-RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
- Loop *L, PHINode *OrigPhi, Instruction *I, RecurKind Kind, InstDesc &Prev,
- FastMathFlags FuncFMF, ScalarEvolution *SE) {
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
+ Instruction *I, RecurKind Kind,
+ InstDesc &Prev, ScalarEvolution *SE) {
assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
switch (I->getOpcode()) {
default:
@@ -1055,51 +1051,37 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
DemandedBits *DB, AssumptionCache *AC,
DominatorTree *DT,
ScalarEvolution *SE) {
- BasicBlock *Header = TheLoop->getHeader();
- Function &F = *Header->getParent();
- FastMathFlags FMF;
- FMF.setNoNaNs(
- F.getFnAttribute("no-nans-fp-math").getValueAsBool());
- FMF.setNoSignedZeros(
- F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
-
- if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::Add, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::Sub, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found a SUB reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, FMF, RedDes,
- DB, AC, DT, SE)) {
+ if (AddReductionVar(Phi, RecurKind::AddChainWithSubs, TheLoop, RedDes, DB, AC,
+ DT, SE)) {
LLVM_DEBUG(dbgs() << "Found a chained ADD-SUB reduction PHI." << *Phi
<< "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::Or, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::And, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
return true;
}
- auto RD = getMinMaxRecurrence(Phi, TheLoop, FMF, SE);
+ auto RD = getMinMaxRecurrence(Phi, TheLoop, SE);
if (RD.getRecurrenceKind() != RecurKind::None) {
assert(
RecurrenceDescriptor::isMinMaxRecurrenceKind(RD.getRecurrenceKind()) &&
@@ -1108,28 +1090,25 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
RedDes = RD;
return true;
}
- if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::AnyOf, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found a conditional select reduction PHI." << *Phi
<< "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC,
- DT, SE)) {
+ if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, RedDes, DB, AC, DT,
+ SE)) {
LLVM_DEBUG(dbgs() << "Found a Find reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT,
- SE)) {
+ if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, RedDes, DB, AC, DT, SE)) {
LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
return true;
}
- if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC, DT,
+ if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, RedDes, DB, AC, DT,
SE)) {
LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
return true;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index fb7890a3b82f4..c99a37d631c96 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -252,15 +252,15 @@ for.end:
; FMIN (FAST)
; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-LABEL: @fmin_fast
; CHECK: vector.body:
; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
; CHECK: %[[FCMP1:.*]] = fcmp fast olt <vscale x 8 x float> %[[LOAD1]]
; CHECK: %[[FCMP2:.*]] = fcmp fast olt <vscale x 8 x float> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
+; CHECK: %[[SEL1:.*]] = select nnan nsz <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
+; CHECK: %[[SEL2:.*]] = select nnan nsz <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
; CHECK: middle.block:
; CHECK: %[[FCMP:.*]] = fcmp fast olt <vscale x 8 x float> %[[SEL1]], %[[SEL2]]
; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]]
@@ -274,7 +274,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp.i = fcmp fast olt float %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, float %0, float %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -286,15 +286,15 @@ for.end:
; FMAX (FAST)
; CHECK-REMARK: vectorized loop (vectorization width: vscale x 8, interleaved count: 2)
-define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-LABEL: @fmax_fast
; CHECK: vector.body:
; CHECK: %[[LOAD1:.*]] = load <vscale x 8 x float>
; CHECK: %[[LOAD2:.*]] = load <vscale x 8 x float>
; CHECK: %[[FCMP1:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD1]]
; CHECK: %[[FCMP2:.*]] = fcmp fast ogt <vscale x 8 x float> %[[LOAD2]]
-; CHECK: %[[SEL1:.*]] = select <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
-; CHECK: %[[SEL2:.*]] = select <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
+; CHECK: %[[SEL1:.*]] = select nnan nsz <vscale x 8 x i1> %[[FCMP1]], <vscale x 8 x float> %[[LOAD1]]
+; CHECK: %[[SEL2:.*]] = select nnan nsz <vscale x 8 x i1> %[[FCMP2]], <vscale x 8 x float> %[[LOAD2]]
; CHECK: middle.block:
; CHECK: %[[FCMP:.*]] = fcmp fast ogt <vscale x 8 x float> %[[SEL1]], %[[SEL2]]
; CHECK-NEXT: %[[SEL:.*]] = select fast <vscale x 8 x i1> %[[FCMP]], <vscale x 8 x float> %[[SEL1]], <vscale x 8 x float> %[[SEL2]]
@@ -308,7 +308,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp.i = fcmp fast ogt float %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, float %0, float %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -451,7 +451,6 @@ for.end:
ret i32 %mul
}
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
!0 = distinct !{!0, !1, !2, !3, !4}
!1 = !{!"llvm.loop.vectorize.width", i32 8}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll
index 671a929e6fa35..ac035ae7fc95a 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reductions.ll
@@ -594,9 +594,9 @@ for.end:
; FMIN (FAST)
-define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-LABEL: define float @fmin_fast(
-; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4:[0-9]+]] {
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -608,8 +608,8 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
-; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan nsz olt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8:%.*]] = select nnan nsz <vscale x 4 x i1> [[TMP2]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP8]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP14]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]]
@@ -617,7 +617,7 @@ define float @fmin_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP9]])
+; CHECK-NEXT: [[TMP12:%.*]] = call nnan nsz float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP9]])
; CHECK-NEXT: br label %[[FOR_END:.*]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret float [[TMP12]]
@@ -630,8 +630,8 @@ for.body:
%sum.07 = phi float [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
- %cmp.i = fcmp olt float %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
+ %cmp.i = fcmp nnan nsz olt float %0, %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, float %0, float %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body
@@ -640,9 +640,9 @@ for.end:
ret float %.sroa.speculated
}
-define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
+define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-LABEL: define half @fmin_fast_half_zvfhmin(
-; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5:[0-9]+]] {
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -654,8 +654,8 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 8 x half> @llvm.vp.load.nxv8f16.p0(ptr align 4 [[TMP6]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP14]])
-; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan nsz olt <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8:%.*]] = select nnan nsz <vscale x 8 x i1> [[TMP2]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = call <vscale x 8 x half> @llvm.vp.merge.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[TMP8]], <vscale x 8 x half> [[VEC_PHI]], i32 [[TMP14]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]]
@@ -663,7 +663,7 @@ define half @fmin_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP12:%.*]] = call half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> [[TMP9]])
+; CHECK-NEXT: [[TMP12:%.*]] = call nnan nsz half @llvm.vector.reduce.fmin.nxv8f16(<vscale x 8 x half> [[TMP9]])
; CHECK-NEXT: br label %[[FOR_END:.*]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret half [[TMP12]]
@@ -676,8 +676,8 @@ for.body:
%sum.07 = phi half [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
%arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
%0 = load half, ptr %arrayidx, align 4
- %cmp.i = fcmp olt half %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07
+ %cmp.i = fcmp nnan nsz olt half %0, %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, half %0, half %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body
@@ -686,9 +686,9 @@ for.end:
ret half %.sroa.speculated
}
-define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 {
+define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
; CHECK-LABEL: define bfloat @fmin_fast_bfloat_zvfbfmin(
-; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR6:[0-9]+]] {
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -700,8 +700,8 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64
; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 8 x bfloat> @llvm.vp.load.nxv8bf16.p0(ptr align 4 [[TMP6]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP14]])
-; CHECK-NEXT: [[TMP7:%.*]] = fcmp olt <vscale x 8 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP2:%.*]] = fcmp nnan nsz olt <vscale x 8 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8:%.*]] = select nnan nsz <vscale x 8 x i1> [[TMP2]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x bfloat> [[TMP8]], <vscale x 8 x bfloat> [[VEC_PHI]], i32 [[TMP14]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]]
@@ -709,7 +709,7 @@ define bfloat @fmin_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP12:%.*]] = call bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> [[TMP9]])
+; CHECK-NEXT: [[TMP12:%.*]] = call nnan nsz bfloat @llvm.vector.reduce.fmin.nxv8bf16(<vscale x 8 x bfloat> [[TMP9]])
; CHECK-NEXT: br label %[[FOR_END:.*]]
; CHECK: [[FOR_END]]:
; CHECK-NEXT: ret bfloat [[TMP12]]
@@ -722,8 +722,8 @@ for.body:
%sum.07 = phi bfloat [ 0.000000e+00, %entry ], [ %.sroa.speculated, %for.body ]
%arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
%0 = load bfloat, ptr %arrayidx, align 4
- %cmp.i = fcmp olt bfloat %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07
+ %cmp.i = fcmp nnan nsz olt bfloat %0, %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, bfloat %0, bfloat %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body
@@ -734,9 +734,9 @@ for.end:
; FMAX (FAST)
-define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
+define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) {
; CHECK-LABEL: define float @fmax_fast(
-; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4]] {
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -749,7 +749,7 @@ define float @fmax_fast(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP6]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP14]])
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8:%.*]] = select nnan nsz <vscale x 4 x i1> [[TMP7]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP8]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP14]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]]
@@ -771,7 +771,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp.i = fcmp fast ogt float %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, float %0, float %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, float %0, float %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body
@@ -780,9 +780,9 @@ for.end:
ret float %.sroa.speculated
}
-define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
+define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-LABEL: define half @fmax_fast_half_zvfhmin(
-; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5]] {
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -795,7 +795,7 @@ define half @fmax_fast_half_zvfhmin(ptr noalias nocapture readonly %a, i64 %n) #
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds half, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 8 x half> @llvm.vp.load.nxv8f16.p0(ptr align 4 [[TMP6]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP14]])
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 8 x half> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8:%.*]] = select nnan nsz <vscale x 8 x i1> [[TMP7]], <vscale x 8 x half> [[WIDE_LOAD]], <vscale x 8 x half> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = call <vscale x 8 x half> @llvm.vp.merge.nxv8f16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x half> [[TMP8]], <vscale x 8 x half> [[VEC_PHI]], i32 [[TMP14]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]]
@@ -817,7 +817,7 @@ for.body:
%arrayidx = getelementptr inbounds half, ptr %a, i64 %iv
%0 = load half, ptr %arrayidx, align 4
%cmp.i = fcmp fast ogt half %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, half %0, half %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, half %0, half %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body
@@ -826,9 +826,9 @@ for.end:
ret half %.sroa.speculated
}
-define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #2 {
+define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64 %n) #1 {
; CHECK-LABEL: define bfloat @fmax_fast_bfloat_zvfbfmin(
-; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR6]] {
+; CHECK-SAME: ptr noalias readonly captures(none) [[A:%.*]], i64 [[N:%.*]]) #[[ATTR5]] {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
@@ -841,7 +841,7 @@ define bfloat @fmax_fast_bfloat_zvfbfmin(ptr noalias nocapture readonly %a, i64
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds bfloat, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = call <vscale x 8 x bfloat> @llvm.vp.load.nxv8bf16.p0(ptr align 4 [[TMP6]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP14]])
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast ogt <vscale x 8 x bfloat> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP8:%.*]] = select <vscale x 8 x i1> [[TMP7]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP8:%.*]] = select nnan nsz <vscale x 8 x i1> [[TMP7]], <vscale x 8 x bfloat> [[WIDE_LOAD]], <vscale x 8 x bfloat> [[VEC_PHI]]
; CHECK-NEXT: [[TMP9]] = call <vscale x 8 x bfloat> @llvm.vp.merge.nxv8bf16(<vscale x 8 x i1> splat (i1 true), <vscale x 8 x bfloat> [[TMP8]], <vscale x 8 x bfloat> [[VEC_PHI]], i32 [[TMP14]])
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP14]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP10]], [[INDEX]]
@@ -863,7 +863,7 @@ for.body:
%arrayidx = getelementptr inbounds bfloat, ptr %a, i64 %iv
%0 = load bfloat, ptr %arrayidx, align 4
%cmp.i = fcmp fast ogt bfloat %0, %sum.07
- %.sroa.speculated = select i1 %cmp.i, bfloat %0, bfloat %sum.07
+ %.sroa.speculated = select nnan nsz i1 %cmp.i, bfloat %0, bfloat %sum.07
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body
@@ -1255,6 +1255,5 @@ for.end:
declare float @llvm.fmuladd.f32(float, float, float)
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
-attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfhmin,+zvfhmin"}
-attributes #2 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "target-features"="+zfbfmin,+zvfbfmin"}
+attributes #0 = { "target-features"="+zfhmin,+zvfhmin"}
+attributes #1 = { "target-features"="+zfbfmin,+zvfbfmin"}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
index 77818580ccf50..35008d3eea092 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-inloop-reduction.ll
@@ -987,7 +987,7 @@ for.end:
ret float %mul
}
-define float @fmin(ptr %a, i64 %n, float %start) #0 {
+define float @fmin(ptr %a, i64 %n, float %start) {
; IF-EVL-LABEL: @fmin(
; IF-EVL-NEXT: entry:
; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]]
@@ -1049,7 +1049,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP11]], [[RDX]]
-; NO-VP-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP11]], float [[RDX]]
+; NO-VP-NEXT: [[MIN]] = select nnan nsz i1 [[CMP]], float [[TMP11]], float [[RDX]]
; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
@@ -1066,7 +1066,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp = fcmp fast olt float %0, %rdx
- %min = select i1 %cmp, float %0, float %rdx
+ %min = select nnan nsz i1 %cmp, float %0, float %rdx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -1075,7 +1075,7 @@ for.end:
ret float %min
}
-define float @fmax(ptr %a, i64 %n, float %start) #0 {
+define float @fmax(ptr %a, i64 %n, float %start) {
; IF-EVL-LABEL: @fmax(
; IF-EVL-NEXT: entry:
; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]]
@@ -1137,7 +1137,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP11:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP11]], [[RDX]]
-; NO-VP-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP11]], float [[RDX]]
+; NO-VP-NEXT: [[MAX]] = select nnan nsz i1 [[CMP]], float [[TMP11]], float [[RDX]]
; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
@@ -1154,7 +1154,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp = fcmp fast ogt float %0, %rdx
- %max = select i1 %cmp, float %0, float %rdx
+ %max = select nnan nsz i1 %cmp, float %0, float %rdx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -1661,7 +1661,6 @@ declare float @llvm.minimum.f32(float, float)
declare float @llvm.maximum.f32(float, float)
declare float @llvm.fmuladd.f32(float, float, float)
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
index 89d04f6423c58..6cb9a10856f05 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/tail-folding-reduction.ll
@@ -1028,7 +1028,7 @@ for.end:
ret float %mul
}
-define float @fmin(ptr %a, i64 %n, float %start) #0 {
+define float @fmin(ptr %a, i64 %n, float %start) {
; IF-EVL-LABEL: @fmin(
; IF-EVL-NEXT: entry:
; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]]
@@ -1044,7 +1044,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast olt <vscale x 4 x float> [[VP_OP_LOAD]], [[VEC_PHI]]
-; IF-EVL-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = select nnan nsz <vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
@@ -1077,7 +1077,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast olt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
-; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; NO-VP-NEXT: [[TMP10]] = select nnan nsz <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
@@ -1095,7 +1095,7 @@ define float @fmin(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast olt float [[TMP13]], [[RDX]]
-; NO-VP-NEXT: [[MIN]] = select i1 [[CMP]], float [[TMP13]], float [[RDX]]
+; NO-VP-NEXT: [[MIN]] = select nnan nsz i1 [[CMP]], float [[TMP13]], float [[RDX]]
; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
@@ -1112,7 +1112,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp = fcmp fast olt float %0, %rdx
- %min = select i1 %cmp, float %0, float %rdx
+ %min = select nnan nsz i1 %cmp, float %0, float %rdx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -1121,7 +1121,7 @@ for.end:
ret float %min
}
-define float @fmax(ptr %a, i64 %n, float %start) #0 {
+define float @fmax(ptr %a, i64 %n, float %start) {
; IF-EVL-LABEL: @fmax(
; IF-EVL-NEXT: entry:
; IF-EVL-NEXT: br label [[VECTOR_PH:%.*]]
@@ -1137,7 +1137,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[EVL_BASED_IV]]
; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP11]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP13:%.*]] = fcmp fast ogt <vscale x 4 x float> [[VP_OP_LOAD]], [[VEC_PHI]]
-; IF-EVL-NEXT: [[TMP14:%.*]] = select <vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = select nnan nsz <vscale x 4 x i1> [[TMP13]], <vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
; IF-EVL-NEXT: [[TMP15]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP14]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP9]])
; IF-EVL-NEXT: [[TMP16:%.*]] = zext i32 [[TMP9]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
@@ -1170,7 +1170,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i64 [[INDEX]]
; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
; NO-VP-NEXT: [[TMP9:%.*]] = fcmp fast ogt <vscale x 4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
-; NO-VP-NEXT: [[TMP10]] = select <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
+; NO-VP-NEXT: [[TMP10]] = select nnan nsz <vscale x 4 x i1> [[TMP9]], <vscale x 4 x float> [[WIDE_LOAD]], <vscale x 4 x float> [[VEC_PHI]]
; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP3]]
; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
@@ -1188,7 +1188,7 @@ define float @fmax(ptr %a, i64 %n, float %start) #0 {
; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[IV]]
; NO-VP-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; NO-VP-NEXT: [[CMP:%.*]] = fcmp fast ogt float [[TMP13]], [[RDX]]
-; NO-VP-NEXT: [[MAX]] = select i1 [[CMP]], float [[TMP13]], float [[RDX]]
+; NO-VP-NEXT: [[MAX]] = select nnan nsz i1 [[CMP]], float [[TMP13]], float [[RDX]]
; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
@@ -1205,7 +1205,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %iv
%0 = load float, ptr %arrayidx, align 4
%cmp = fcmp fast ogt float %0, %rdx
- %max = select i1 %cmp, float %0, float %rdx
+ %max = select nnan nsz i1 %cmp, float %0, float %rdx
%iv.next = add nuw nsw i64 %iv, 1
%exitcond.not = icmp eq i64 %iv.next, %n
br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
@@ -1713,7 +1713,6 @@ declare float @llvm.minimum.f32(float, float)
declare float @llvm.maximum.f32(float, float)
declare float @llvm.fmuladd.f32(float, float, float)
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
index 3afdf947081b6..3352fb10f93f1 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/reduction-fastmath.ll
@@ -202,7 +202,7 @@ loop.exit:
; New instructions should have the same FMF as the original code.
; Note that the select inherits FMF from its fcmp condition.
-define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 {
+define float @PR35538(ptr nocapture readonly %a, i32 %N) {
; CHECK-LABEL: @PR35538(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP12:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -225,8 +225,8 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 {
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
; CHECK-NEXT: [[TMP7:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD2]], [[VEC_PHI1]]
-; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP6]], <4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]
-; CHECK-NEXT: [[TMP9]] = select <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]]
+; CHECK-NEXT: [[TMP8]] = select nnan ninf nsz <4 x i1> [[TMP6]], <4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP9]] = select nnan ninf nsz <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -252,7 +252,7 @@ define float @PR35538(ptr nocapture readonly %a, i32 %N) #0 {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[CMP1_INV:%.*]] = fcmp nnan ninf nsz oge float [[TMP12]], [[MAX_013]]
-; CHECK-NEXT: [[MAX_0_]] = select i1 [[CMP1_INV]], float [[TMP12]], float [[MAX_013]]
+; CHECK-NEXT: [[MAX_0_]] = select nnan ninf nsz i1 [[CMP1_INV]], float [[TMP12]], float [[MAX_013]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -275,7 +275,7 @@ for.body:
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp1.inv = fcmp nnan ninf nsz oge float %0, %max.013
- %max.0. = select i1 %cmp1.inv, float %0, float %max.013
+ %max.0. = select nnan ninf nsz i1 %cmp1.inv, float %0, float %max.013
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.cond.cleanup, label %for.body
@@ -283,7 +283,7 @@ for.body:
; Same as above, but this time the select already has matching FMF with its condition.
-define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 {
+define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) {
; CHECK-LABEL: @PR35538_more_FMF(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[CMP12:%.*]] = icmp sgt i32 [[N:%.*]], 0
@@ -304,17 +304,17 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 {
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[TMP2]], i64 4
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x float>, ptr [[TMP5]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
-; CHECK-NEXT: [[TMP7:%.*]] = fcmp nnan ninf oge <4 x float> [[WIDE_LOAD2]], [[VEC_PHI1]]
-; CHECK-NEXT: [[TMP8]] = select nnan ninf <4 x i1> [[TMP6]], <4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]
-; CHECK-NEXT: [[TMP9]] = select nnan ninf <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]]
+; CHECK-NEXT: [[TMP6:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP7:%.*]] = fcmp nnan ninf nsz oge <4 x float> [[WIDE_LOAD2]], [[VEC_PHI1]]
+; CHECK-NEXT: [[TMP8]] = select nnan ninf nsz <4 x i1> [[TMP6]], <4 x float> [[WIDE_LOAD]], <4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP9]] = select nnan ninf nsz <4 x i1> [[TMP7]], <4 x float> [[WIDE_LOAD2]], <4 x float> [[VEC_PHI1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf ogt <4 x float> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = call nnan ninf float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]])
+; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <4 x float> [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP8]], <4 x float> [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = call nnan ninf nsz float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]])
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -332,8 +332,8 @@ define float @PR35538_more_FMF(ptr nocapture readonly %a, i32 %N) #0 {
; CHECK-NEXT: [[MAX_013:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MAX_0_]], [[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP12:%.*]] = load float, ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[CMP1_INV:%.*]] = fcmp nnan ninf oge float [[TMP12]], [[MAX_013]]
-; CHECK-NEXT: [[MAX_0_]] = select nnan ninf i1 [[CMP1_INV]], float [[TMP12]], float [[MAX_013]]
+; CHECK-NEXT: [[CMP1_INV:%.*]] = fcmp nnan ninf nsz oge float [[TMP12]], [[MAX_013]]
+; CHECK-NEXT: [[MAX_0_]] = select nnan ninf nsz i1 [[CMP1_INV]], float [[TMP12]], float [[MAX_013]]
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -355,11 +355,10 @@ for.body:
%max.013 = phi float [ -1.000000e+00, %for.body.lr.ph ], [ %max.0., %for.body ]
%arrayidx = getelementptr inbounds float, ptr %a, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
- %cmp1.inv = fcmp nnan ninf oge float %0, %max.013
- %max.0. = select nnan ninf i1 %cmp1.inv, float %0, float %max.013
+ %cmp1.inv = fcmp nnan ninf nsz oge float %0, %max.013
+ %max.0. = select nnan ninf nsz i1 %cmp1.inv, float %0, float %max.013
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
-attributes #0 = { "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
index 555e695cfa935..2a997060b3083 100644
--- a/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-minmax-instruction-flag.ll
@@ -42,33 +42,26 @@ out: ; preds = %loop
ret float %t6
}
-; Check if vectorization is still enabled by function attribute.
+; Function attributes are not used for FP min/max vectorization.
define float @minloopattr(ptr nocapture readonly %arg) #0 {
; CHECK-LABEL: @minloopattr(
; CHECK-NEXT: top:
; CHECK-NEXT: [[T:%.*]] = load float, ptr [[ARG:%.*]], align 4
-; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i64 0
-; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[ARG]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP1]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = fcmp olt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: middle.block:
-; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP4]])
; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ]
+; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ]
+; CHECK-NEXT: [[T3:%.*]] = getelementptr float, ptr [[ARG]], i64 [[T1]]
+; CHECK-NEXT: [[T4:%.*]] = load float, ptr [[T3]], align 4
+; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]]
+; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]]
+; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1
+; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537
+; CHECK-NEXT: br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]]
; CHECK: out:
-; CHECK-NEXT: ret float [[TMP6]]
+; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ]
+; CHECK-NEXT: ret float [[T6_LCSSA]]
;
top:
%t = load float, ptr %arg
@@ -129,8 +122,8 @@ out: ; preds = %loop
ret float %t6
}
-; This test is checking that we don't vectorize when only one of the required attributes is set.
-; Note that this test should not vectorize even after switching to IR-level FMF.
+; This test is checking that we don't vectorize when only one of the required flags is set.
+
define float @minloopmissingnsz(ptr nocapture readonly %arg) #1 {
; CHECK-LABEL: @minloopmissingnsz(
; CHECK-NEXT: top:
diff --git a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
index e7ab02cd98a5e..78d14dfa0cee2 100644
--- a/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/minmax_reduction.ll
@@ -401,11 +401,11 @@ for.end:
; Turn this into a max reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @max_red_float(
; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @max_red_float(float %max) #0 {
+define float @max_red_float(float %max) {
entry:
br label %for.body
@@ -415,7 +415,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ogt float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+ %max.red.0 = select nnan nsz i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -426,11 +426,11 @@ for.end:
; CHECK-LABEL: @max_red_float_ge(
; CHECK: fcmp fast oge <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @max_red_float_ge(float %max) #0 {
+define float @max_red_float_ge(float %max) {
entry:
br label %for.body
@@ -440,7 +440,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast oge float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+ %max.red.0 = select nnan nsz i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -451,11 +451,11 @@ for.end:
; CHECK-LABEL: @inverted_max_red_float(
; CHECK: fcmp fast olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @inverted_max_red_float(float %max) #0 {
+define float @inverted_max_red_float(float %max) {
entry:
br label %for.body
@@ -465,7 +465,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast olt float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+ %max.red.0 = select nnan nsz i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -476,11 +476,11 @@ for.end:
; CHECK-LABEL: @inverted_max_red_float_le(
; CHECK: fcmp fast ole <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @inverted_max_red_float_le(float %max) #0 {
+define float @inverted_max_red_float_le(float %max) {
entry:
br label %for.body
@@ -490,7 +490,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ole float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+ %max.red.0 = select nnan nsz i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -501,11 +501,11 @@ for.end:
; CHECK-LABEL: @unordered_max_red_float(
; CHECK: fcmp fast ugt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @unordered_max_red_float(float %max) #0 {
+define float @unordered_max_red_float(float %max) {
entry:
br label %for.body
@@ -515,7 +515,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ugt float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+ %max.red.0 = select nnan nsz i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -526,11 +526,11 @@ for.end:
; CHECK-LABEL: @unordered_max_red_float_ge(
; CHECK: fcmp fast uge <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @unordered_max_red_float_ge(float %max) #0 {
+define float @unordered_max_red_float_ge(float %max) {
entry:
br label %for.body
@@ -540,7 +540,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast uge float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+ %max.red.0 = select nnan nsz i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -551,11 +551,11 @@ for.end:
; CHECK-LABEL: @inverted_unordered_max_red_float(
; CHECK: fcmp fast ult <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @inverted_unordered_max_red_float(float %max) #0 {
+define float @inverted_unordered_max_red_float(float %max) {
entry:
br label %for.body
@@ -565,7 +565,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ult float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+ %max.red.0 = select nnan nsz i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -576,11 +576,11 @@ for.end:
; CHECK-LABEL: @inverted_unordered_max_red_float_le(
; CHECK: fcmp fast ule <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmax.v2f32
-define float @inverted_unordered_max_red_float_le(float %max) #0 {
+define float @inverted_unordered_max_red_float_le(float %max) {
entry:
br label %for.body
@@ -590,7 +590,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ule float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %max.red.08, float %0
+ %max.red.0 = select nnan nsz i1 %cmp3, float %max.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -604,11 +604,11 @@ for.end:
; Turn this into a min reduction in the presence of a no-nans-fp-math attribute.
; CHECK-LABEL: @min_red_float(
; CHECK: fcmp fast olt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @min_red_float(float %min) #0 {
+define float @min_red_float(float %min) {
entry:
br label %for.body
@@ -618,7 +618,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast olt float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+ %min.red.0 = select nnan nsz i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -629,11 +629,11 @@ for.end:
; CHECK-LABEL: @min_red_float_le(
; CHECK: fcmp fast ole <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @min_red_float_le(float %min) #0 {
+define float @min_red_float_le(float %min) {
entry:
br label %for.body
@@ -643,7 +643,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ole float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+ %min.red.0 = select nnan nsz i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -654,11 +654,11 @@ for.end:
; CHECK-LABEL: @inverted_min_red_float(
; CHECK: fcmp fast ogt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @inverted_min_red_float(float %min) #0 {
+define float @inverted_min_red_float(float %min) {
entry:
br label %for.body
@@ -668,7 +668,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ogt float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+ %min.red.0 = select nnan nsz i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -679,11 +679,11 @@ for.end:
; CHECK-LABEL: @inverted_min_red_float_ge(
; CHECK: fcmp fast oge <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @inverted_min_red_float_ge(float %min) #0 {
+define float @inverted_min_red_float_ge(float %min) {
entry:
br label %for.body
@@ -693,7 +693,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast oge float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+ %min.red.0 = select nnan nsz i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -704,11 +704,11 @@ for.end:
; CHECK-LABEL: @unordered_min_red_float(
; CHECK: fcmp fast ult <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @unordered_min_red_float(float %min) #0 {
+define float @unordered_min_red_float(float %min) {
entry:
br label %for.body
@@ -718,7 +718,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ult float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+ %min.red.0 = select nnan nsz i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -729,11 +729,11 @@ for.end:
; CHECK-LABEL: @unordered_min_red_float_le(
; CHECK: fcmp fast ule <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @unordered_min_red_float_le(float %min) #0 {
+define float @unordered_min_red_float_le(float %min) {
entry:
br label %for.body
@@ -743,7 +743,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ule float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %0, float %min.red.08
+ %min.red.0 = select nnan nsz i1 %cmp3, float %0, float %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -754,11 +754,11 @@ for.end:
; CHECK-LABEL: @inverted_unordered_min_red_float(
; CHECK: fcmp fast ugt <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @inverted_unordered_min_red_float(float %min) #0 {
+define float @inverted_unordered_min_red_float(float %min) {
entry:
br label %for.body
@@ -768,7 +768,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ugt float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+ %min.red.0 = select nnan nsz i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -779,11 +779,11 @@ for.end:
; CHECK-LABEL: @inverted_unordered_min_red_float_ge(
; CHECK: fcmp fast uge <2 x float>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast float @llvm.vector.reduce.fmin.v2f32
-define float @inverted_unordered_min_red_float_ge(float %min) #0 {
+define float @inverted_unordered_min_red_float_ge(float %min) {
entry:
br label %for.body
@@ -793,7 +793,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast uge float %0, %min.red.08
- %min.red.0 = select i1 %cmp3, float %min.red.08, float %0
+ %min.red.0 = select nnan nsz i1 %cmp3, float %min.red.08, float %0
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -805,11 +805,11 @@ for.end:
; Make sure we handle doubles, too.
; CHECK-LABEL: @min_red_double(
; CHECK: fcmp fast olt <2 x double>
-; CHECK: select <2 x i1>
+; CHECK: select nnan nsz <2 x i1>
; CHECK: middle.block
; CHECK: call fast double @llvm.vector.reduce.fmin.v2f64
-define double @min_red_double(double %min) #0 {
+define double @min_red_double(double %min) {
entry:
br label %for.body
@@ -819,7 +819,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x double], ptr @dA, i64 0, i64 %indvars.iv
%0 = load double, ptr %arrayidx, align 4
%cmp3 = fcmp fast olt double %0, %min.red.08
- %min.red.0 = select i1 %cmp3, double %0, double %min.red.08
+ %min.red.0 = select nnan nsz i1 %cmp3, double %0, double %min.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -829,7 +829,7 @@ for.end:
}
-; Don't this into a max reduction. The no-nans-fp-math attribute is missing
+; Don't turn this into a max reduction. The nnan flag is missing on select.
; CHECK-LABEL: @max_red_float_nans(
; CHECK-NOT: <2 x float>
@@ -852,11 +852,11 @@ for.end:
ret float %max.red.0
}
-; As above, with the no-signed-zeros-fp-math attribute missing
+; As above, with the nsz flag missing on select.
; CHECK-LABEL: @max_red_float_nsz(
; CHECK-NOT: <2 x float>
-define float @max_red_float_nsz(float %max) #1 {
+define float @max_red_float_nsz(float %max) {
entry:
br label %for.body
@@ -866,7 +866,7 @@ for.body:
%arrayidx = getelementptr inbounds [1024 x float], ptr @fA, i64 0, i64 %indvars.iv
%0 = load float, ptr %arrayidx, align 4
%cmp3 = fcmp fast ogt float %0, %max.red.08
- %max.red.0 = select i1 %cmp3, float %0, float %max.red.08
+ %max.red.0 = select nnan i1 %cmp3, float %0, float %max.red.08
%indvars.iv.next = add i64 %indvars.iv, 1
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.end, label %for.body
@@ -1220,5 +1220,3 @@ declare float @llvm.maxnum.f32(float, float)
declare float @llvm.minimum.f32(float, float)
declare float @llvm.maximum.f32(float, float)
-attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
-attributes #1 = { "no-nans-fp-math"="true" }
More information about the llvm-commits
mailing list