[llvm] [LV][NFC] Regen some partial reduction tests (PR #129047)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 27 05:05:53 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: David Sherwood (david-arm)
<details>
<summary>Changes</summary>
A few test files seemed to have been edited after using the
update_test_checks script, which can make life hard for
developers when trying to update these tests in future
patches. Also, the tests still had this comment at the top
; NOTE: Assertions have been autogenerated by ...
which could potentially be confusing.
---
Patch is 61.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/129047.diff
3 Files Affected:
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll (+532)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-no-dotprod.ll (+31)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-sub.ll (+82)
``````````diff
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
index 4e4a5c82c298a..092938866c65b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-chained.ll
@@ -48,6 +48,32 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-NEON-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE3]])
; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-NEON: scalar.ph:
+; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEON-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-NEON: for.cond.cleanup:
+; CHECK-NEON-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEON-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-NEON: for.body:
+; CHECK-NEON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUB]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-NEON-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-NEON-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-NEON-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-NEON-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-NEON-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-NEON-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEON-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-NEON-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-NEON-NEXT: [[SUB]] = sub i32 [[ADD]], [[MUL_AC]]
+; CHECK-NEON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEON-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEON-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]], !loop [[META4:![0-9]+]]
;
; CHECK-SVE-LABEL: define i32 @chained_partial_reduce_add_sub(
; CHECK-SVE-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -94,6 +120,32 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE: scalar.ph:
+; CHECK-SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE: for.cond.cleanup:
+; CHECK-SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE: for.body:
+; CHECK-SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUB]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-NEXT: [[SUB]] = sub i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]], !loop [[META4:![0-9]+]]
;
; CHECK-SVE-MAXBW-LABEL: define i32 @chained_partial_reduce_add_sub(
; CHECK-SVE-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
@@ -141,6 +193,32 @@ define i32 @chained_partial_reduce_add_sub(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE3]])
; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE-MAXBW: scalar.ph:
+; CHECK-SVE-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE-MAXBW: for.cond.cleanup:
+; CHECK-SVE-MAXBW-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-MAXBW-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE-MAXBW: for.body:
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUB]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[SUB]] = sub i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]], !loop [[META4:![0-9]+]]
;
entry:
%cmp28.not = icmp ult i32 %N, 2
@@ -213,6 +291,32 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-NEON-NEXT: [[TMP13:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE3]])
; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-NEON: scalar.ph:
+; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEON-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-NEON: for.cond.cleanup:
+; CHECK-NEON-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEON-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-NEON: for.body:
+; CHECK-NEON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-NEON-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-NEON-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-NEON-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-NEON-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-NEON-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-NEON-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEON-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-NEON-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-NEON-NEXT: [[ADD_2]] = add i32 [[ADD]], [[MUL_AC]]
+; CHECK-NEON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEON-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEON-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]], !loop [[META4]]
;
; CHECK-SVE-LABEL: define i32 @chained_partial_reduce_add_add(
; CHECK-SVE-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
@@ -259,6 +363,32 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE: scalar.ph:
+; CHECK-SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE: for.cond.cleanup:
+; CHECK-SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE: for.body:
+; CHECK-SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-NEXT: [[ADD_2]] = add i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]], !loop [[META4]]
;
; CHECK-SVE-MAXBW-LABEL: define i32 @chained_partial_reduce_add_add(
; CHECK-SVE-MAXBW-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
@@ -305,6 +435,32 @@ define i32 @chained_partial_reduce_add_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-MAXBW-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.nxv2i32(<vscale x 2 x i32> [[PARTIAL_REDUCE3]])
; CHECK-SVE-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE-MAXBW: scalar.ph:
+; CHECK-SVE-MAXBW-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-MAXBW-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP19]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-MAXBW-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE-MAXBW: for.cond.cleanup:
+; CHECK-SVE-MAXBW-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD_2:%.*]], [[FOR_BODY]] ], [ [[TMP19]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-MAXBW-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE-MAXBW: for.body:
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD_2]], [[FOR_BODY]] ]
+; CHECK-SVE-MAXBW-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-MAXBW-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-MAXBW-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[ADD:%.*]] = add nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-SVE-MAXBW-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-SVE-MAXBW-NEXT: [[ADD_2]] = add i32 [[ADD]], [[MUL_AC]]
+; CHECK-SVE-MAXBW-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-SVE-MAXBW-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-SVE-MAXBW-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]], !loop [[META4]]
;
entry:
%cmp28.not = icmp ult i32 %N, 2
@@ -378,6 +534,32 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-NEON-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[PARTIAL_REDUCE3]])
; CHECK-NEON-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-NEON-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-NEON: scalar.ph:
+; CHECK-NEON-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEON-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP15]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEON-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-NEON: for.cond.cleanup:
+; CHECK-NEON-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEON-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-NEON: for.body:
+; CHECK-NEON-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEON-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-NEON-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-NEON-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-NEON-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-NEON-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-NEON-NEXT: [[B_EXT:%.*]] = sext i8 [[B_VAL]] to i32
+; CHECK-NEON-NEXT: [[C_EXT:%.*]] = sext i8 [[C_VAL]] to i32
+; CHECK-NEON-NEXT: [[MUL_AB:%.*]] = mul nsw i32 [[A_EXT]], [[B_EXT]]
+; CHECK-NEON-NEXT: [[SUB:%.*]] = sub nsw i32 [[RES]], [[MUL_AB]]
+; CHECK-NEON-NEXT: [[MUL_AC:%.*]] = mul nsw i32 [[A_EXT]], [[C_EXT]]
+; CHECK-NEON-NEXT: [[ADD]] = add i32 [[SUB]], [[MUL_AC]]
+; CHECK-NEON-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEON-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
+; CHECK-NEON-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]], !loop [[META4]]
;
; CHECK-SVE-LABEL: define i32 @chained_partial_reduce_sub_add(
; CHECK-SVE-SAME: ptr [[A:%.*]], ptr [[B:%.*]], ptr [[C:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
@@ -424,6 +606,32 @@ define i32 @chained_partial_reduce_sub_add(ptr %a, ptr %b, ptr %c, i32 %N) #0 {
; CHECK-SVE-NEXT: [[TMP21:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP19]])
; CHECK-SVE-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-SVE-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
+; CHECK-SVE: scalar.ph:
+; CHECK-SVE-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-SVE-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
+; CHECK-SVE-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK-SVE: for.cond.cleanup:
+; CHECK-SVE-NEXT: [[RES_0_LCSSA:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
+; CHECK-SVE-NEXT: ret i32 [[RES_0_LCSSA]]
+; CHECK-SVE: for.body:
+; CHECK-SVE-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[RES:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-SVE-NEXT: [[A_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[B_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[B]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[C_PTR:%.*]] = getelementptr inbounds nuw i8, ptr [[C]], i64 [[INDVARS_IV]]
+; CHECK-SVE-NEXT: [[A_VAL:%.*]] = load i8, ptr [[A_PTR]], align 1
+; CHECK-SVE-NEXT: [[B_VAL:%.*]] = load i8, ptr [[B_PTR]], align 1
+; CHECK-SVE-NEXT: [[C_VAL:%.*]] = load i8, ptr [[C_PTR]], align 1
+; CHECK-SVE-NEXT: [[A_EXT:%.*]] = sext i8 [[A_VAL]] to i32
+; CHECK-SVE-NEXT: [[B_EXT:%.*]] = sext i8 [[B...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/129047
More information about the llvm-commits
mailing list