[llvm] 1bee549 - [LoopVectorize] propagate fast-math-flags from induction instructions
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 4 14:21:50 PST 2021
Author: Sanjay Patel
Date: 2021-03-04T17:21:32-05:00
New Revision: 1bee549737ace3a96288d8b16768aec5899fbb91
URL: https://github.com/llvm/llvm-project/commit/1bee549737ace3a96288d8b16768aec5899fbb91
DIFF: https://github.com/llvm/llvm-project/commit/1bee549737ace3a96288d8b16768aec5899fbb91.diff
LOG: [LoopVectorize] propagate fast-math-flags from induction instructions
This code assumed that FP math was only permissable if it was
fully "fast", so it hard-coded "fast" when creating new instructions.
The underlying code already allows matching recurrences/reductions
that are only "reassoc", so this change should prevent the potential
miscompile seen in the test diffs (we created "fast" ops even though
none existed in the original code).
I don't know if we need to create the temporary IRBuilder objects
used here, so that could be follow-up clean-up.
There's an open question about whether we should require "nsz" in
addition to "reassoc" here. InstCombine uses that combo for its
reassociative folds, but I think codegen is not as strict.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/float-induction.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 007888e3a7f4..b18aa6c4865e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -395,13 +395,6 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL, ElementCount VF) {
/// we always assume predicated blocks have a 50% chance of executing.
static unsigned getReciprocalPredBlockProb() { return 2; }
-/// A helper function that adds a 'fast' flag to floating-point operations.
-static Value *addFastMathFlag(Value *V) {
- if (isa<FPMathOperator>(V))
- cast<Instruction>(V)->setFastMathFlags(FastMathFlags::getFast());
- return V;
-}
-
/// A helper function that returns an integer or floating-point constant with
/// value C.
static Constant *getSignedIntOrFpConstant(Type *Ty, int64_t C) {
@@ -2247,7 +2240,7 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
// floating-point arithmetic as appropriate.
Value *ConstVF =
getSignedIntOrFpConstant(Step->getType(), VF.getKnownMinValue());
- Value *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, Step, ConstVF));
+ Value *Mul = Builder.CreateBinOp(MulOp, Step, ConstVF);
// Create a vector splat to use in the induction update.
//
@@ -2274,8 +2267,8 @@ void InnerLoopVectorizer::createVectorIntOrFpInductionPHI(
recordVectorLoopValueForInductionCast(II, EntryVal, LastInduction, CastDef,
State, Part);
- LastInduction = cast<Instruction>(addFastMathFlag(
- Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add")));
+ LastInduction = cast<Instruction>(
+ Builder.CreateBinOp(AddOp, LastInduction, SplatVF, "step.add"));
LastInduction->setDebugLoc(EntryVal->getDebugLoc());
}
@@ -2407,6 +2400,11 @@ void InnerLoopVectorizer::widenIntOrFpInduction(PHINode *IV, Value *Start,
}
};
+ // Fast-math-flags propagate from the original induction instruction.
+ IRBuilder<>::FastMathFlagGuard FMFG(Builder);
+ if (ID.getInductionBinOp() && isa<FPMathOperator>(ID.getInductionBinOp()))
+ Builder.setFastMathFlags(ID.getInductionBinOp()->getFastMathFlags());
+
// Now do the actual transformations, and start with creating the step value.
Value *Step = CreateStepValue(ID.getStep());
if (VF.isZero() || VF.isScalar()) {
@@ -2486,23 +2484,11 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
Indices.push_back(ConstantFP::get(STy, (double)(StartIdx + i)));
// Add the consecutive indices to the vector value.
+ // Floating-point operations inherit FMF via the builder's flags.
Constant *Cv = ConstantVector::get(Indices);
-
Step = Builder.CreateVectorSplat(VLen, Step);
-
- // Floating point operations had to be 'fast' to enable the induction.
- FastMathFlags Flags;
- Flags.setFast();
-
Value *MulOp = Builder.CreateFMul(Cv, Step);
- if (isa<Instruction>(MulOp))
- // Have to check, MulOp may be a constant
- cast<Instruction>(MulOp)->setFastMathFlags(Flags);
-
- Value *BOp = Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
- if (isa<Instruction>(BOp))
- cast<Instruction>(BOp)->setFastMathFlags(Flags);
- return BOp;
+ return Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
}
void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
@@ -2547,15 +2533,15 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
createStepForVF(Builder, ConstantInt::get(IntStepTy, Part), VF);
if (ScalarIVTy->isFloatingPointTy())
StartIdx = Builder.CreateSIToFP(StartIdx, ScalarIVTy);
- StartIdx = addFastMathFlag(Builder.CreateBinOp(
- AddOp, StartIdx, getSignedIntOrFpConstant(ScalarIVTy, Lane)));
+ StartIdx = Builder.CreateBinOp(
+ AddOp, StartIdx, getSignedIntOrFpConstant(ScalarIVTy, Lane));
// The step returned by `createStepForVF` is a runtime-evaluated value
// when VF is scalable. Otherwise, it should be folded into a Constant.
assert((VF.isScalable() || isa<Constant>(StartIdx)) &&
"Expected StartIdx to be folded to a constant when VF is not "
"scalable");
- auto *Mul = addFastMathFlag(Builder.CreateBinOp(MulOp, StartIdx, Step));
- auto *Add = addFastMathFlag(Builder.CreateBinOp(AddOp, ScalarIV, Mul));
+ auto *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step);
+ auto *Add = Builder.CreateBinOp(AddOp, ScalarIV, Mul);
State.set(Def, Add, VPIteration(Part, Lane));
recordVectorLoopValueForInductionCast(ID, EntryVal, Add, CastDef, State,
Part, Lane);
@@ -3325,6 +3311,7 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
return LoopVectorBody->getTerminator();
return &*B.GetInsertPoint();
};
+
switch (ID.getKind()) {
case InductionDescriptor::IK_IntInduction: {
assert(Index->getType() == StartValue->getType() &&
@@ -3352,22 +3339,9 @@ Value *InnerLoopVectorizer::emitTransformedIndex(
"Original bin op should be defined for FP induction");
Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
-
- // Floating point operations had to be 'fast' to enable the induction.
- FastMathFlags Flags;
- Flags.setFast();
-
Value *MulExp = B.CreateFMul(StepValue, Index);
- if (isa<Instruction>(MulExp))
- // We have to check, the MulExp may be a constant.
- cast<Instruction>(MulExp)->setFastMathFlags(Flags);
-
- Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
- "induction");
- if (isa<Instruction>(BOp))
- cast<Instruction>(BOp)->setFastMathFlags(Flags);
-
- return BOp;
+ return B.CreateBinOp(InductionBinOp->getOpcode(), StartValue, MulExp,
+ "induction");
}
case InductionDescriptor::IK_NoInduction:
return nullptr;
@@ -3454,6 +3428,11 @@ void InnerLoopVectorizer::createInductionResumeValues(
EndValue = VectorTripCount;
} else {
IRBuilder<> B(L->getLoopPreheader()->getTerminator());
+
+ // Fast-math-flags propagate from the original induction instruction.
+ if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
+ B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
+
Type *StepType = II.getStep()->getType();
Instruction::CastOps CastOp =
CastInst::getCastOpcode(VectorTripCount, true, StepType, true);
@@ -3675,6 +3654,11 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
assert(isa<PHINode>(UI) && "Expected LCSSA form");
IRBuilder<> B(MiddleBlock->getTerminator());
+
+ // Fast-math-flags propagate from the original induction instruction.
+ if (II.getInductionBinOp() && isa<FPMathOperator>(II.getInductionBinOp()))
+ B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
+
Value *CountMinusOne = B.CreateSub(
CountRoundDown, ConstantInt::get(CountRoundDown->getType(), 1));
Value *CMO =
@@ -7889,9 +7873,9 @@ Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
if (Ty->isFloatingPointTy()) {
Constant *C = ConstantFP::get(Ty, (double)StartIdx);
- // Floating point operations had to be 'fast' to enable the unrolling.
- Value *MulOp = addFastMathFlag(Builder.CreateFMul(C, Step));
- return addFastMathFlag(Builder.CreateBinOp(BinOp, Val, MulOp));
+ // Floating-point operations inherit FMF via the builder's flags.
+ Value *MulOp = Builder.CreateFMul(C, Step);
+ return Builder.CreateBinOp(BinOp, Val, MulOp);
}
Constant *C = ConstantInt::get(Ty, StartIdx);
return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
diff --git a/llvm/test/Transforms/LoopVectorize/float-induction.ll b/llvm/test/Transforms/LoopVectorize/float-induction.ll
index c3215bcbb1a1..291c01efc3ab 100644
--- a/llvm/test/Transforms/LoopVectorize/float-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/float-induction.ll
@@ -105,8 +105,8 @@ for.end: ; preds = %for.end.loopexit, %
ret void
}
-; FIXME: We do not need the full 'fast' FMF to vectorize the loop, but the code can't become
-; 'fast' spontaneously. Something is wrong with FMF expectations/propagation.
+; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become
+; 'fast' spontaneously - FMF should propagate from the original IR.
define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i32 %N) {
; VEC4_INTERL1-LABEL: @fp_iv_loop1_reassoc_FMF(
@@ -123,15 +123,15 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
; VEC4_INTERL1: vector.ph:
; VEC4_INTERL1-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
; VEC4_INTERL1-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
-; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
-; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
+; VEC4_INTERL1-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
+; VEC4_INTERL1-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]]
-; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
+; VEC4_INTERL1-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL1-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]]
+; VEC4_INTERL1-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
; VEC4_INTERL1-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0
; VEC4_INTERL1-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL1-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -142,7 +142,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
; VEC4_INTERL1-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
; VEC4_INTERL1-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
; VEC4_INTERL1-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
+; VEC4_INTERL1-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
; VEC4_INTERL1-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC4_INTERL1-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; VEC4_INTERL1: middle.block:
@@ -181,22 +181,22 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
; VEC4_INTERL2: vector.ph:
; VEC4_INTERL2-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
; VEC4_INTERL2-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
-; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
-; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
+; VEC4_INTERL2-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
+; VEC4_INTERL2-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
-; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
-; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]]
-; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
+; VEC4_INTERL2-NEXT: [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
+; VEC4_INTERL2-NEXT: [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]]
+; VEC4_INTERL2-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
; VEC4_INTERL2-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i32 0
; VEC4_INTERL2-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
; VEC4_INTERL2-NEXT: br label [[VECTOR_BODY:%.*]]
; VEC4_INTERL2: vector.body:
; VEC4_INTERL2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; VEC4_INTERL2-NEXT: [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
+; VEC4_INTERL2-NEXT: [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
; VEC4_INTERL2-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
; VEC4_INTERL2-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
; VEC4_INTERL2-NEXT: store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
@@ -204,7 +204,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
; VEC4_INTERL2-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
; VEC4_INTERL2-NEXT: store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
; VEC4_INTERL2-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
-; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
+; VEC4_INTERL2-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
; VEC4_INTERL2-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC4_INTERL2-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; VEC4_INTERL2: middle.block:
@@ -265,15 +265,15 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
; VEC2_INTERL1_PRED_STORE: vector.ph:
; VEC2_INTERL1_PRED_STORE-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
; VEC2_INTERL1_PRED_STORE-NEXT: [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i32 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i32 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
-; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP4]]
-; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP4:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
+; VEC2_INTERL1_PRED_STORE-NEXT: [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP4]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i32 0
; VEC2_INTERL1_PRED_STORE-NEXT: [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
; VEC2_INTERL1_PRED_STORE-NEXT: br label [[VECTOR_BODY:%.*]]
@@ -284,7 +284,7 @@ define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
; VEC2_INTERL1_PRED_STORE-NEXT: store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
; VEC2_INTERL1_PRED_STORE-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
-; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
+; VEC2_INTERL1_PRED_STORE-NEXT: [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
; VEC2_INTERL1_PRED_STORE-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; VEC2_INTERL1_PRED_STORE-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
; VEC2_INTERL1_PRED_STORE: middle.block:
More information about the llvm-commits
mailing list