[llvm] 440a8ad - [VPlan] Use VPIRFlags to manage FMFs for ComputeReductionResult (NFC).
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed May 28 12:55:08 PDT 2025
Author: Florian Hahn
Date: 2025-05-28T20:54:58+01:00
New Revision: 440a8adb86564846e10830f56a9073e43d834c9b
URL: https://github.com/llvm/llvm-project/commit/440a8adb86564846e10830f56a9073e43d834c9b
DIFF: https://github.com/llvm/llvm-project/commit/440a8adb86564846e10830f56a9073e43d834c9b.diff
LOG: [VPlan] Use VPIRFlags to manage FMFs for ComputeReductionResult (NFC).
Manage fast-math flags using VPIRFlags from VPInstruciton, in inline
with other VPInstructions. With this change, we now print the correctly
flags for ComputeReductionResult, other than that NFC.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2fe59a464457f..8040d375f0dbd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9576,8 +9576,13 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
Builder.createNaryOp(VPInstruction::ComputeFindLastIVResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
} else {
- FinalReductionResult = Builder.createNaryOp(
- VPInstruction::ComputeReductionResult, {PhiR, NewExitingVPV}, ExitDL);
+ VPIRFlags Flags = RecurrenceDescriptor::isFloatingPointRecurrenceKind(
+ RdxDesc.getRecurrenceKind())
+ ? VPIRFlags(RdxDesc.getFastMathFlags())
+ : VPIRFlags();
+ FinalReductionResult =
+ Builder.createNaryOp(VPInstruction::ComputeReductionResult,
+ {PhiR, NewExitingVPV}, Flags, ExitDL);
}
// Update all users outside the vector region.
OrigExitingVPV->replaceUsesWithIf(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 72aa771b0b98e..5fbf79085cb74 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -648,6 +648,10 @@ Value *VPInstruction::generate(VPTransformState &State) {
for (unsigned Part = 0; Part < UF; ++Part)
RdxParts[Part] = State.get(getOperand(1 + Part), PhiR->isInLoop());
+ IRBuilderBase::FastMathFlagGuard FMFG(Builder);
+ if (hasFastMathFlags())
+ Builder.setFastMathFlags(getFastMathFlags());
+
// If the vector reduction can be performed in a smaller type, we truncate
// then extend the loop exit value to enable InstCombine to evaluate the
// entire expression in the smaller type.
@@ -663,8 +667,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
ReducedPartRdx = RdxParts[UF - 1];
} else {
// Floating-point operations should have some FMF to enable the reduction.
- IRBuilderBase::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
for (unsigned Part = 1; Part < UF; ++Part) {
Value *RdxPart = RdxParts[Part];
if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))
@@ -684,9 +686,6 @@ Value *VPInstruction::generate(VPTransformState &State) {
// TODO: Support in-order reductions based on the recurrence descriptor.
// All ops in the reduction inherit fast-math-flags from the recurrence
// descriptor.
- IRBuilderBase::FastMathFlagGuard FMFG(Builder);
- Builder.setFastMathFlags(RdxDesc.getFastMathFlags());
-
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(RK))
ReducedPartRdx =
createAnyOfReduction(Builder, ReducedPartRdx, RdxDesc, OrigPhi);
@@ -1599,7 +1598,8 @@ bool VPIRFlags::flagsValidForOpcode(unsigned Opcode) const {
Opcode == Instruction::FSub || Opcode == Instruction::FNeg ||
Opcode == Instruction::FDiv || Opcode == Instruction::FRem ||
Opcode == Instruction::FCmp || Opcode == Instruction::Select ||
- Opcode == VPInstruction::WideIVStep;
+ Opcode == VPInstruction::WideIVStep ||
+ Opcode == VPInstruction::ComputeReductionResult;
case OperationType::NonNegOp:
return Opcode == Instruction::ZExt;
break;
diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
index 6804c16147c2e..7d45b66f0ad37 100644
--- a/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/vplan-printing-reductions.ll
@@ -34,7 +34,7 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next>
; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-last-element vp<[[RED_RES]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
@@ -102,7 +102,7 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RES:.+]]> = compute-reduction-result ir<%red>, ir<%red.next>
+; CHECK-NEXT: EMIT vp<[[RED_RES:.+]]> = compute-reduction-result fast ir<%red>, ir<%red.next>
; CHECK-NEXT: CLONE store vp<[[RED_RES]]>, ir<%dst>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
@@ -175,7 +175,7 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: Successor(s): middle.block
; CHECK-EMPTY:
; CHECK-NEXT: middle.block:
-; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result ir<%sum.07>, ir<[[MULADD]]>
+; CHECK-NEXT: EMIT vp<[[RED_RES:%.+]]> = compute-reduction-result nnan ninf nsz ir<%sum.07>, ir<[[MULADD]]>
; CHECK-NEXT: EMIT vp<[[RED_EX:%.+]]> = extract-last-element vp<[[RED_RES]]>
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<%n>, vp<[[VTC]]>
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
More information about the llvm-commits
mailing list