[llvm] [RFC][llvm] Added llvm.loop.vectorize.reassociate_fpreductions.enable metadata. (PR #141685)
Slava Zakharin via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 11 11:48:20 PDT 2025
https://github.com/vzakhari updated https://github.com/llvm/llvm-project/pull/141685
>From 1619ad67557dc0495fdcd2e5b8be84b51b80df91 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 27 May 2025 15:58:17 -0700
Subject: [PATCH 1/5] [RFC][llvm] Added
llvm.loop.vectorize.reassociation.enable metadata.
This metadata allows unsafe reassociations of computations during
the loop vectorization. For example, it allows vectorizing loops
with floating-point reductions without the need to compile the whole
function/program with -fassociative-math.
---
llvm/docs/LangRef.rst | 16 +++++++
.../Vectorize/LoopVectorizationLegality.h | 14 +++++-
.../Vectorize/LoopVectorizationLegality.cpp | 8 +++-
.../LoopVectorize/reduction-reassociate.ll | 47 +++++++++++++++++++
4 files changed, 82 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 6a4bf6e594d14..b0f42bafd85c1 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7593,6 +7593,22 @@ Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving
multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0
then the interleave count will be determined automatically.
+'``llvm.loop.vectorize.reassociation.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+This metadata selectively allows or disallows reassociating computations,
+which otherwise may be unsafe to reassociate, during the loop vectorization.
+For example, a floating point ``ADD`` reduction without ``reassoc`` fast-math
+flags may be vectorized provided that this metadata allows it. The first
+operand is the string ``llvm.loop.vectorize.reassociation.enable``
+and the second operand is a bit. If the bit operand value is 1 unsafe
+reassociations aqre enabled. A value of 0 disables unsafe reassociations.
+
+.. code-block:: llvm
+
+ !0 = !{!"llvm.loop.vectorize.reassociation.enable", i1 0}
+ !1 = !{!"llvm.loop.vectorize.reassociation.enable", i1 1}
+
'``llvm.loop.vectorize.enable``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index d654ac3ec9273..fb91eb022daf6 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -64,7 +64,8 @@ class LoopVectorizeHints {
HK_FORCE,
HK_ISVECTORIZED,
HK_PREDICATE,
- HK_SCALABLE
+ HK_SCALABLE,
+ HK_REASSOCIATE,
};
/// Hint - associates name and validation with the hint value.
@@ -97,6 +98,10 @@ class LoopVectorizeHints {
/// Says whether we should use fixed width or scalable vectorization.
Hint Scalable;
+ /// Says whether unsafe reassociation of computations is allowed
+ /// during the loop vectorization.
+ Hint Reassociate;
+
/// Return the loop metadata prefix.
static StringRef Prefix() { return "llvm.loop."; }
@@ -162,6 +167,13 @@ class LoopVectorizeHints {
return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
}
+ enum ForceKind getReassociate() const {
+ if ((ForceKind)Reassociate.Value == FK_Undefined &&
+ hasDisableAllTransformsHint(TheLoop))
+ return FK_Disabled;
+ return (ForceKind)Reassociate.Value;
+ }
+
/// If hints are provided that force vectorization, use the AlwaysPrint
/// pass name to force the frontend to print the diagnostic.
const char *vectorizeAnalysisPassName() const;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 8e09e6f8d4935..ec3194f754664 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -97,6 +97,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
case HK_ISVECTORIZED:
case HK_PREDICATE:
case HK_SCALABLE:
+ case HK_REASSOCIATE:
return (Val == 0 || Val == 1);
}
return false;
@@ -112,6 +113,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
+ Reassociate("vectorize.reassociation.enable", FK_Undefined,
+ HK_REASSOCIATE),
TheLoop(L), ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@@ -251,6 +254,7 @@ bool LoopVectorizeHints::allowReordering() const {
ElementCount EC = getWidth();
return HintsAllowReordering &&
(getForce() == LoopVectorizeHints::FK_Enabled ||
+ getReassociate() == LoopVectorizeHints::FK_Enabled ||
EC.getKnownMinValue() > 1);
}
@@ -300,8 +304,8 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
return;
unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force,
- &IsVectorized, &Predicate, &Scalable};
+ Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized,
+ &Predicate, &Scalable, &Reassociate};
for (auto *H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
new file mode 100644
index 0000000000000..ffe69596545a9
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
@@ -0,0 +1,47 @@
+; Check that the loop with a floating-point reduction is vectorized
+; due to llvm.loop.vectorize.reassociation.enable metadata.
+; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s
+
+source_filename = "FIRModule"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
+define void @test_(ptr captures(none) %0, ptr readonly captures(none) %1) local_unnamed_addr #0 {
+; CHECK-LABEL: define void @test_(
+; CHECK-NEXT: fadd contract <4 x float> {{.*}}
+; CHECK-NEXT: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}})
+;
+ %invariant.gep = getelementptr i8, ptr %1, i64 -4
+ %.promoted = load float, ptr %0, align 4
+ br label %3
+
+3: ; preds = %2, %3
+ %indvars.iv = phi i64 [ 1, %2 ], [ %indvars.iv.next, %3 ]
+ %4 = phi float [ %.promoted, %2 ], [ %6, %3 ]
+ %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv
+ %5 = load float, ptr %gep, align 4
+ %6 = fadd contract float %4, %5
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond.not = icmp eq i64 %indvars.iv.next, 1001
+ br i1 %exitcond.not, label %7, label %3, !llvm.loop !2
+
+7: ; preds = %3
+ %.lcssa = phi float [ %6, %3 ]
+ store float %.lcssa, ptr %0, align 4
+ ret void
+}
+
+attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "target-cpu"="x86-64" }
+
+!llvm.ident = !{!0}
+!llvm.module.flags = !{!1}
+
+!0 = !{!"flang version 21.0.0"}
+!1 = !{i32 2, !"Debug Info Version", i32 3}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.vectorize.reassociation.enable", i1 true}
+
+; CHECK-NOT: llvm.loop.vectorize.reassociation.enable
+; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
>From 9511b6e2e10ce539519e9a7f446ccd0f7dd39d84 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 27 May 2025 17:41:20 -0700
Subject: [PATCH 2/5] Fixed test.
---
.../Transforms/LoopVectorize/reduction-reassociate.ll | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
index ffe69596545a9..e35ad858b8d89 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
@@ -9,8 +9,8 @@ target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
define void @test_(ptr captures(none) %0, ptr readonly captures(none) %1) local_unnamed_addr #0 {
; CHECK-LABEL: define void @test_(
-; CHECK-NEXT: fadd contract <4 x float> {{.*}}
-; CHECK-NEXT: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}})
+; CHECK: fadd contract <4 x float> {{.*}}
+; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}})
;
%invariant.gep = getelementptr i8, ptr %1, i64 -4
%.promoted = load float, ptr %0, align 4
@@ -43,5 +43,5 @@ attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "ta
!3 = !{!"llvm.loop.vectorize.reassociation.enable", i1 true}
; CHECK-NOT: llvm.loop.vectorize.reassociation.enable
-; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: !{!"llvm.loop.unroll.runtime.disable"}
>From 5ba9cbd40cbb8dcd2129060cf171655d9efd1c58 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Mon, 2 Jun 2025 20:02:32 -0700
Subject: [PATCH 3/5] Made metadata specific to FP reductions.
---
llvm/docs/LangRef.rst | 22 +++++----
.../Vectorize/LoopVectorizationLegality.h | 16 ++++---
.../Vectorize/LoopVectorizationLegality.cpp | 46 ++++++++++++-------
.../Transforms/Vectorize/LoopVectorize.cpp | 5 +-
.../LoopVectorize/reduction-reassociate.ll | 6 +--
5 files changed, 57 insertions(+), 38 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b0f42bafd85c1..ed5fc5b6c5769 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7593,21 +7593,23 @@ Note that setting ``llvm.loop.interleave.count`` to 1 disables interleaving
multiple iterations of the loop. If ``llvm.loop.interleave.count`` is set to 0
then the interleave count will be determined automatically.
-'``llvm.loop.vectorize.reassociation.enable``' Metadata
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+'``llvm.loop.vectorize.reassociate_fpreductions.enable``' Metadata
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-This metadata selectively allows or disallows reassociating computations,
-which otherwise may be unsafe to reassociate, during the loop vectorization.
-For example, a floating point ``ADD`` reduction without ``reassoc`` fast-math
-flags may be vectorized provided that this metadata allows it. The first
-operand is the string ``llvm.loop.vectorize.reassociation.enable``
+This metadata selectively allows or disallows reassociating floating-point
+reductions, which otherwise may be unsafe to reassociate, during the loop
+vectorization. For example, a floating point ``ADD`` reduction without
+``reassoc`` fast-math flags may be vectorized provided that this metadata
+allows it. The first operand is the string
+``llvm.loop.vectorize.reassociate_fpreductions.enable``
and the second operand is a bit. If the bit operand value is 1 unsafe
-reassociations aqre enabled. A value of 0 disables unsafe reassociations.
+reduction reassociations are enabled. A value of 0 disables unsafe
+reduction reassociations.
.. code-block:: llvm
- !0 = !{!"llvm.loop.vectorize.reassociation.enable", i1 0}
- !1 = !{!"llvm.loop.vectorize.reassociation.enable", i1 1}
+ !0 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 0}
+ !1 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 1}
'``llvm.loop.vectorize.enable``' Metadata
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index fb91eb022daf6..5911501ca2d3e 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -65,7 +65,7 @@ class LoopVectorizeHints {
HK_ISVECTORIZED,
HK_PREDICATE,
HK_SCALABLE,
- HK_REASSOCIATE,
+ HK_REASSOCIATE_FP_REDUCTIONS,
};
/// Hint - associates name and validation with the hint value.
@@ -98,9 +98,9 @@ class LoopVectorizeHints {
/// Says whether we should use fixed width or scalable vectorization.
Hint Scalable;
- /// Says whether unsafe reassociation of computations is allowed
+ /// Says whether unsafe reassociation of reductions is allowed
/// during the loop vectorization.
- Hint Reassociate;
+ Hint ReassociateFPReductions;
/// Return the loop metadata prefix.
static StringRef Prefix() { return "llvm.loop."; }
@@ -167,11 +167,11 @@ class LoopVectorizeHints {
return (ScalableForceKind)Scalable.Value == SK_FixedWidthOnly;
}
- enum ForceKind getReassociate() const {
- if ((ForceKind)Reassociate.Value == FK_Undefined &&
+ enum ForceKind getReassociateFPReductions() const {
+ if ((ForceKind)ReassociateFPReductions.Value == FK_Undefined &&
hasDisableAllTransformsHint(TheLoop))
return FK_Disabled;
- return (ForceKind)Reassociate.Value;
+ return (ForceKind)ReassociateFPReductions.Value;
}
/// If hints are provided that force vectorization, use the AlwaysPrint
@@ -185,6 +185,10 @@ class LoopVectorizeHints {
/// error accumulates in the loop.
bool allowReordering() const;
+ /// Returns true iff the loop hints allow reassociating floating-point
+ /// reductions for the purpose of vectorization.
+ bool allowFPReductionReassociation() const;
+
bool isPotentiallyUnsafe() const {
// Avoid FP vectorization if the target is unsure about proper support.
// This may be related to the SIMD unit in the target not handling
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index ec3194f754664..dffff6f7278a1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -97,7 +97,7 @@ bool LoopVectorizeHints::Hint::validate(unsigned Val) {
case HK_ISVECTORIZED:
case HK_PREDICATE:
case HK_SCALABLE:
- case HK_REASSOCIATE:
+ case HK_REASSOCIATE_FP_REDUCTIONS:
return (Val == 0 || Val == 1);
}
return false;
@@ -113,8 +113,8 @@ LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
- Reassociate("vectorize.reassociation.enable", FK_Undefined,
- HK_REASSOCIATE),
+ ReassociateFPReductions("vectorize.reassociate_fpreductions.enable",
+ FK_Undefined, HK_REASSOCIATE_FP_REDUCTIONS),
TheLoop(L), ORE(ORE) {
// Populate values with existing loop metadata.
getHintsFromMetadata();
@@ -254,10 +254,14 @@ bool LoopVectorizeHints::allowReordering() const {
ElementCount EC = getWidth();
return HintsAllowReordering &&
(getForce() == LoopVectorizeHints::FK_Enabled ||
- getReassociate() == LoopVectorizeHints::FK_Enabled ||
EC.getKnownMinValue() > 1);
}
+bool LoopVectorizeHints::allowFPReductionReassociation() const {
+ return HintsAllowReordering &&
+ getReassociateFPReductions() == LoopVectorizeHints::FK_Enabled;
+}
+
void LoopVectorizeHints::getHintsFromMetadata() {
MDNode *LoopID = TheLoop->getLoopID();
if (!LoopID)
@@ -304,8 +308,13 @@ void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
return;
unsigned Val = C->getZExtValue();
- Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized,
- &Predicate, &Scalable, &Reassociate};
+ Hint *Hints[] = {&Width,
+ &Interleave,
+ &Force,
+ &IsVectorized,
+ &Predicate,
+ &Scalable,
+ &ReassociateFPReductions};
for (auto *H : Hints) {
if (Name == H->Name) {
if (H->validate(Val))
@@ -1315,22 +1324,25 @@ bool LoopVectorizationLegality::canVectorizeFPMath(
return true;
// If the above is false, we have ExactFPMath & do not allow reordering.
- // If the EnableStrictReductions flag is set, first check if we have any
- // Exact FP induction vars, which we cannot vectorize.
- if (!EnableStrictReductions ||
- any_of(getInductionVars(), [&](auto &Induction) -> bool {
+ // First check if we have any Exact FP induction vars, which we cannot
+ // vectorize.
+ if (any_of(getInductionVars(), [&](auto &Induction) -> bool {
InductionDescriptor IndDesc = Induction.second;
return IndDesc.getExactFPMathInst();
}))
return false;
- // We can now only vectorize if all reductions with Exact FP math also
- // have the isOrdered flag set, which indicates that we can move the
- // reduction operations in-loop.
- return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
- const RecurrenceDescriptor &RdxDesc = Reduction.second;
- return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
- }));
+ // We can now only vectorize if EnableStrictReductions flag is set and
+ // all reductions with Exact FP math also have the isOrdered flag set,
+ // which indicates that we can move the reduction operations in-loop.
+ // If the hints allow reassociating FP reductions, then skip
+ // all the checks.
+ return (Hints->allowFPReductionReassociation() ||
+ all_of(getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ return !RdxDesc.hasExactFPMath() ||
+ (EnableStrictReductions && RdxDesc.isOrdered());
+ }));
}
bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fc8ebebcf21b7..608715453e40d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1000,9 +1000,10 @@ class LoopVectorizationCostModel {
/// Returns true if we should use strict in-order reductions for the given
/// RdxDesc. This is true if the -enable-strict-reductions flag is passed,
/// the IsOrdered flag of RdxDesc is set and we do not allow reordering
- /// of FP operations.
+ /// of FP operations or FP reductions.
bool useOrderedReductions(const RecurrenceDescriptor &RdxDesc) const {
- return !Hints->allowReordering() && RdxDesc.isOrdered();
+ return !Hints->allowReordering() &&
+ !Hints->allowFPReductionReassociation() && RdxDesc.isOrdered();
}
/// \returns The smallest bitwidth each instruction can be represented with.
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
index e35ad858b8d89..08b08d2d405b6 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
@@ -1,5 +1,5 @@
; Check that the loop with a floating-point reduction is vectorized
-; due to llvm.loop.vectorize.reassociation.enable metadata.
+; due to llvm.loop.vectorize.reassociate_fpreductions.enable metadata.
; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s
source_filename = "FIRModule"
@@ -40,8 +40,8 @@ attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "ta
!0 = !{!"flang version 21.0.0"}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = distinct !{!2, !3}
-!3 = !{!"llvm.loop.vectorize.reassociation.enable", i1 true}
+!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true}
-; CHECK-NOT: llvm.loop.vectorize.reassociation.enable
+; CHECK-NOT: llvm.loop.vectorize.reassociate_fpreductions.enable
; CHECK: !{!"llvm.loop.isvectorized", i32 1}
; CHECK: !{!"llvm.loop.unroll.runtime.disable"}
>From 91f390e8d82e4e5b8c6667c54f621c7d6c842637 Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Tue, 10 Jun 2025 17:44:22 -0700
Subject: [PATCH 4/5] Updated LangRef and the test.
---
llvm/docs/LangRef.rst | 14 +-
.../LoopVectorize/reduction-reassociate.ll | 151 ++++++++++++++----
2 files changed, 130 insertions(+), 35 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index ed5fc5b6c5769..6cd7321d0c4e0 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -7597,7 +7597,7 @@ then the interleave count will be determined automatically.
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
This metadata selectively allows or disallows reassociating floating-point
-reductions, which otherwise may be unsafe to reassociate, during the loop
+reductions, which otherwise may be unsafe to reassociate, during loop
vectorization. For example, a floating point ``ADD`` reduction without
``reassoc`` fast-math flags may be vectorized provided that this metadata
allows it. The first operand is the string
@@ -7606,6 +7606,18 @@ and the second operand is a bit. If the bit operand value is 1 unsafe
reduction reassociations are enabled. A value of 0 disables unsafe
reduction reassociations.
+Note that the reassociation of floating point reductions that is allowed
+by other means is considered safe, so this metadata is a no-op
+in such cases.
+
+For example, reassociation of floating point reduction
+in a loop with ``!{!"llvm.loop.vectorize.enable", i1 1}`` metadata is allowed
+regardless of the value of
+``llvm.loop.vectorize.reassociate_fpreductions.enable``.
+
+Similarly, the reassociation is allowed for reduction operations
+with ``reassoc`` fast-math flags always.
+
.. code-block:: llvm
!0 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 0}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
index 08b08d2d405b6..1e760c841f3dd 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
@@ -1,47 +1,130 @@
-; Check that the loop with a floating-point reduction is vectorized
-; due to llvm.loop.vectorize.reassociate_fpreductions.enable metadata.
-; RUN: opt -passes=loop-vectorize -S < %s 2>&1 | FileCheck %s
+; Check that the loops with a floating-point reduction are vectorized
+; according to llvm.loop.vectorize.reassociate_fpreductions.enable metadata.
+; RUN: opt -passes=loop-vectorize -S < %s | FileCheck %s
-source_filename = "FIRModule"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; Function Attrs: nofree norecurse nosync nounwind memory(argmem: readwrite)
-define void @test_(ptr captures(none) %0, ptr readonly captures(none) %1) local_unnamed_addr #0 {
-; CHECK-LABEL: define void @test_(
+define float @test_enable(ptr readonly captures(none) %array, float %init) {
+; CHECK-LABEL: define float @test_enable(
; CHECK: fadd contract <4 x float> {{.*}}
+; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD0:[0-9]+]]
; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}})
+; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD3:[0-9]+]]
;
- %invariant.gep = getelementptr i8, ptr %1, i64 -4
- %.promoted = load float, ptr %0, align 4
- br label %3
-
-3: ; preds = %2, %3
- %indvars.iv = phi i64 [ 1, %2 ], [ %indvars.iv.next, %3 ]
- %4 = phi float [ %.promoted, %2 ], [ %6, %3 ]
- %gep = getelementptr float, ptr %invariant.gep, i64 %indvars.iv
- %5 = load float, ptr %gep, align 4
- %6 = fadd contract float %4, %5
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond.not = icmp eq i64 %indvars.iv.next, 1001
- br i1 %exitcond.not, label %7, label %3, !llvm.loop !2
-
-7: ; preds = %3
- %.lcssa = phi float [ %6, %3 ]
- store float %.lcssa, ptr %0, align 4
- ret void
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %red = phi float [ %init, %entry ], [ %red.next, %loop ]
+ %gep = getelementptr float, ptr %array, i64 %iv
+ %element = load float, ptr %gep, align 4
+ %red.next = fadd contract float %red, %element
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !0
+
+exit:
+ %result = phi float [ %red.next, %loop ]
+ ret float %result
}
-attributes #0 = { nofree norecurse nosync nounwind memory(argmem: readwrite) "target-cpu"="x86-64" }
+; The reduction is unsafe, and the metadata does not allow
+; vectorizing it:
+define float @test_disable(ptr readonly captures(none) %array, float %init) {
+; CHECK-LABEL: define float @test_disable(
+; CHECK-NOT: <4 x float>
+; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD4:[0-9]+]]
+;
+entry:
+ br label %loop
-!llvm.ident = !{!0}
-!llvm.module.flags = !{!1}
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %red = phi float [ %init, %entry ], [ %red.next, %loop ]
+ %gep = getelementptr float, ptr %array, i64 %iv
+ %element = load float, ptr %gep, align 4
+ %red.next = fadd contract float %red, %element
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !2
+
+exit:
+ %result = phi float [ %red.next, %loop ]
+ ret float %result
+}
+
+; Forced vectorization "makes" the reduction reassociation safe,
+; so setting llvm.loop.vectorize.reassociate_fpreductions.enable
+; to false does not have effect:
+define float @test_disable_with_forced_vectorization(ptr readonly captures(none) %array, float %init) {
+; CHECK-LABEL: define float @test_disable_with_forced_vectorization(
+; CHECK: fadd contract <4 x float> {{.*}}
+; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD6:[0-9]+]]
+; CHECK: call contract float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> {{.*}})
+; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD7:[0-9]+]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %red = phi float [ %init, %entry ], [ %red.next, %loop ]
+ %gep = getelementptr float, ptr %array, i64 %iv
+ %element = load float, ptr %gep, align 4
+ %red.next = fadd contract float %red, %element
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !4
+
+exit:
+ %result = phi float [ %red.next, %loop ]
+ ret float %result
+}
+
+; 'fast' math makes reduction reassociation safe,
+; so setting llvm.loop.vectorize.reassociate_fpreductions.enable
+; to false does not have effect:
+define float @test_disable_with_fast_math(ptr readonly captures(none) %array, float %init) {
+; CHECK-LABEL: define float @test_disable_with_fast_math(
+; CHECK: fadd fast <4 x float> {{.*}}
+; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD8:[0-9]+]]
+; CHECK: call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> {{.*}})
+; CHECK: br i1 %{{.*}}, !llvm.loop ![[MD9:[0-9]+]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %red = phi float [ %init, %entry ], [ %red.next, %loop ]
+ %gep = getelementptr float, ptr %array, i64 %iv
+ %element = load float, ptr %gep, align 4
+ %red.next = fadd fast float %red, %element
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond.not, label %exit, label %loop, !llvm.loop !2
+
+exit:
+ %result = phi float [ %red.next, %loop ]
+ ret float %result
+}
-!0 = !{!"flang version 21.0.0"}
-!1 = !{i32 2, !"Debug Info Version", i32 3}
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true}
!2 = distinct !{!2, !3}
-!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 true}
+!3 = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 false}
+!4 = distinct !{!4, !3, !5}
+!5 = !{!"llvm.loop.vectorize.enable", i1 true}
; CHECK-NOT: llvm.loop.vectorize.reassociate_fpreductions.enable
-; CHECK: !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: ![[MD0]] = distinct !{![[MD0]], ![[MD1:[0-9]+]], ![[MD2:[0-9]+]]}
+; CHECK: ![[MD1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: ![[MD2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: ![[MD3]] = distinct !{![[MD3]], ![[MD2]], ![[MD1]]}
+; CHECK: ![[MD4]] = distinct !{![[MD4]], ![[MD5:[0-9]+]]}
+; CHECK: ![[MD5]] = !{!"llvm.loop.vectorize.reassociate_fpreductions.enable", i1 false}
+; CHECK: ![[MD6]] = distinct !{![[MD6]], ![[MD1]], ![[MD2]]}
+; CHECK: ![[MD7]] = distinct !{![[MD7]], ![[MD2]], ![[MD1]]}
+; CHECK: ![[MD8]] = distinct !{![[MD8]], ![[MD1]], ![[MD2]]}
+; CHECK: ![[MD9]] = distinct !{![[MD9]], ![[MD2]], ![[MD1]]}
>From 676dedebdb336661b44b25fce2ba3f587c7eb04d Mon Sep 17 00:00:00 2001
From: Slava Zakharin <szakharin at nvidia.com>
Date: Wed, 11 Jun 2025 11:47:55 -0700
Subject: [PATCH 5/5] Moved test to X86 dir.
---
.../Transforms/LoopVectorize/{ => X86}/reduction-reassociate.ll | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename llvm/test/Transforms/LoopVectorize/{ => X86}/reduction-reassociate.ll (100%)
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll b/llvm/test/Transforms/LoopVectorize/X86/reduction-reassociate.ll
similarity index 100%
rename from llvm/test/Transforms/LoopVectorize/reduction-reassociate.ll
rename to llvm/test/Transforms/LoopVectorize/X86/reduction-reassociate.ll
More information about the llvm-commits
mailing list