[llvm] c73de97 - [IVDesciptors] Support detecting reductions with vector instructions. (#166353)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 24 03:12:11 PST 2025
Author: Julian Nagele
Date: 2025-11-24T11:12:06Z
New Revision: c73de9777e67df4411020a7909f0eadbbf1de08b
URL: https://github.com/llvm/llvm-project/commit/c73de9777e67df4411020a7909f0eadbbf1de08b
DIFF: https://github.com/llvm/llvm-project/commit/c73de9777e67df4411020a7909f0eadbbf1de08b.diff
LOG: [IVDesciptors] Support detecting reductions with vector instructions. (#166353)
In combination with https://github.com/llvm/llvm-project/pull/149470
this will introduce parallel accumulators when unrolling reductions with
vector instructions. See also
https://github.com/llvm/llvm-project/pull/166630, which aims to
introduce parallel accumulators for FP reductions.
Added:
Modified:
llvm/lib/Analysis/IVDescriptors.cpp
llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index 641850b46bbd8..26c7f805bdb6d 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -270,10 +270,12 @@ bool RecurrenceDescriptor::AddReductionVar(
// resulting from the type promotion performed by InstCombine. Vector
// operations are not limited to the legal integer widths, so we may be able
// to evaluate the reduction in the narrower width.
- if (RecurrenceType->isFloatingPointTy()) {
+ // Check the scalar type to handle both scalar and vector types.
+ Type *ScalarTy = RecurrenceType->getScalarType();
+ if (ScalarTy->isFloatingPointTy()) {
if (!isFloatingPointRecurrenceKind(Kind))
return false;
- } else if (RecurrenceType->isIntegerTy()) {
+ } else if (ScalarTy->isIntegerTy()) {
if (!isIntegerRecurrenceKind(Kind))
return false;
if (!isMinMaxRecurrenceKind(Kind))
diff --git a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
index 2d48d20ba9c5c..220a4a29a3041 100644
--- a/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/partial-unroll-reductions.ll
@@ -358,6 +358,7 @@ loop:
exit:
ret float %rdx.next
}
+
define i32 @test_smin(ptr %src, i64 %n) {
; CHECK-LABEL: define i32 @test_smin(
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
@@ -623,3 +624,56 @@ loop:
exit:
ret i32 %rdx.next
}
+
+define <4 x i32> @test_vector_add(ptr %p, i64 %n, <4 x i32> %start) {
+; CHECK-LABEL: define <4 x i32> @test_vector_add(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[N:%.*]], <4 x i32> [[START:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX_1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX_2:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[RDX_NEXT_2:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX_3:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[RDX_NEXT_3:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi <4 x i32> [ [[START]], %[[ENTRY]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV]]
+; CHECK-NEXT: [[L:%.*]] = load <4 x i32>, ptr [[GEP]], align 16
+; CHECK-NEXT: [[RDX_NEXT]] = add <4 x i32> [[RDX]], [[L]]
+; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[L_1:%.*]] = load <4 x i32>, ptr [[GEP_1]], align 16
+; CHECK-NEXT: [[RDX_NEXT_1]] = add <4 x i32> [[RDX_1]], [[L_1]]
+; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
+; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: [[L_2:%.*]] = load <4 x i32>, ptr [[GEP_2]], align 16
+; CHECK-NEXT: [[RDX_NEXT_2]] = add <4 x i32> [[RDX_2]], [[L_2]]
+; CHECK-NEXT: [[IV_NEXT_3]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[P]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: [[L_3:%.*]] = load <4 x i32>, ptr [[GEP_3]], align 16
+; CHECK-NEXT: [[RDX_NEXT_3]] = add <4 x i32> [[RDX_3]], [[L_3]]
+; CHECK-NEXT: [[EC_3:%.*]] = icmp ne i64 [[IV_NEXT_3]], 1000
+; CHECK-NEXT: br i1 [[EC_3]], label %[[LOOP]], label %[[EXIT:.*]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RDX_NEXT_LCSSA:%.*]] = phi <4 x i32> [ [[RDX_NEXT_3]], %[[LOOP]] ]
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[RDX_NEXT_1]], [[RDX_NEXT]]
+; CHECK-NEXT: [[BIN_RDX1:%.*]] = add <4 x i32> [[RDX_NEXT_2]], [[BIN_RDX]]
+; CHECK-NEXT: [[BIN_RDX2:%.*]] = add <4 x i32> [[RDX_NEXT_3]], [[BIN_RDX1]]
+; CHECK-NEXT: ret <4 x i32> [[BIN_RDX2]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %rdx = phi <4 x i32> [ %start, %entry ], [ %rdx.next, %loop ]
+ %iv.next = add i64 %iv, 1
+ %gep = getelementptr inbounds nuw <4 x i32>, ptr %p, i64 %iv
+ %l = load <4 x i32>, ptr %gep, align 16
+ %rdx.next = add <4 x i32> %rdx, %l
+ %ec = icmp ne i64 %iv.next, 1000
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret <4 x i32> %rdx.next
+}
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
index a5ac2cf46653d..fb1f2fcf5c190 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-unroll-reductions.ll
@@ -220,6 +220,72 @@ exit:
ret i32 %res
}
+define <4 x i32> @test_vector_add_reduction(ptr %a, i64 %n) {
+; CHECK-LABEL: define <4 x i32> @test_vector_add_reduction(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], -1
+; CHECK-NEXT: [[XTRAITER:%.*]] = and i64 [[N]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 1
+; CHECK-NEXT: br i1 [[TMP1]], label %[[LOOP_EPIL_PREHEADER:.*]], label %[[ENTRY_NEW:.*]]
+; CHECK: [[ENTRY_NEW]]:
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = sub i64 [[N]], [[XTRAITER]]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[IV_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX_1:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY_NEW]] ], [ [[RDX_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY_NEW]] ], [ [[RDX_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[NITER:%.*]] = phi i64 [ 0, %[[ENTRY_NEW]] ], [ [[NITER_NEXT_1:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_A:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV]]
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[GEP_A]], align 16
+; CHECK-NEXT: [[RDX_NEXT]] = add <4 x i32> [[RDX]], [[TMP2]]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[GEP_A_1]], align 16
+; CHECK-NEXT: [[RDX_NEXT_1]] = add <4 x i32> [[RDX_1]], [[TMP3]]
+; CHECK-NEXT: [[IV_NEXT_1]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT: [[NITER_NEXT_1]] = add i64 [[NITER]], 2
+; CHECK-NEXT: [[NITER_NCMP_1:%.*]] = icmp eq i64 [[NITER_NEXT_1]], [[UNROLL_ITER]]
+; CHECK-NEXT: br i1 [[NITER_NCMP_1]], label %[[EXIT_UNR_LCSSA:.*]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[EXIT_UNR_LCSSA]]:
+; CHECK-NEXT: [[RES_PH:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT: [[IV_UNR:%.*]] = phi i64 [ [[IV_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT: [[RDX_UNR:%.*]] = phi <4 x i32> [ [[RDX_NEXT_1]], %[[LOOP]] ]
+; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[RDX_NEXT_1]], [[RDX_NEXT]]
+; CHECK-NEXT: [[LCMP_MOD:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: br i1 [[LCMP_MOD]], label %[[LOOP_EPIL_PREHEADER]], label %[[EXIT:.*]]
+; CHECK: [[LOOP_EPIL_PREHEADER]]:
+; CHECK-NEXT: [[IV_EPIL_INIT:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_UNR]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[RDX_EPIL_INIT:%.*]] = phi <4 x i32> [ zeroinitializer, %[[ENTRY]] ], [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ]
+; CHECK-NEXT: [[LCMP_MOD2:%.*]] = icmp ne i64 [[XTRAITER]], 0
+; CHECK-NEXT: call void @llvm.assume(i1 [[LCMP_MOD2]])
+; CHECK-NEXT: br label %[[LOOP_EPIL:.*]]
+; CHECK: [[LOOP_EPIL]]:
+; CHECK-NEXT: [[GEP_A_EPIL:%.*]] = getelementptr inbounds nuw <4 x i32>, ptr [[A]], i64 [[IV_EPIL_INIT]]
+; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[GEP_A_EPIL]], align 16
+; CHECK-NEXT: [[RDX_NEXT_EPIL:%.*]] = add <4 x i32> [[RDX_EPIL_INIT]], [[TMP4]]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi <4 x i32> [ [[BIN_RDX]], %[[EXIT_UNR_LCSSA]] ], [ [[RDX_NEXT_EPIL]], %[[LOOP_EPIL]] ]
+; CHECK-NEXT: ret <4 x i32> [[RES]]
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %rdx = phi <4 x i32> [ zeroinitializer, %entry ], [ %rdx.next, %loop ]
+ %gep.a = getelementptr inbounds nuw <4 x i32>, ptr %a, i64 %iv
+ %1 = load <4 x i32>, ptr %gep.a, align 16
+ %rdx.next = add <4 x i32> %rdx, %1
+ %iv.next = add nuw nsw i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %n
+ br i1 %ec, label %exit, label %loop, !llvm.loop !0
+
+exit:
+ %res = phi <4 x i32> [ %rdx.next, %loop ]
+ ret <4 x i32> %res
+}
!0 = distinct !{!0, !1}
@@ -234,4 +300,5 @@ exit:
; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META1]]}
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
;.
More information about the llvm-commits
mailing list