[llvm] LV/test: regen a couple of tests with UTC (NFC) (PR #107225)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 4 04:34:09 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
---
Patch is 97.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/107225.diff
2 Files Affected:
- (modified) llvm/test/Transforms/LoopVectorize/if-reduction.ll (+1069-91)
- (modified) llvm/test/Transforms/LoopVectorize/phi-cost.ll (+117-16)
``````````diff
diff --git a/llvm/test/Transforms/LoopVectorize/if-reduction.ll b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
index 0d5871e24c5247..89b3c0528e5e69 100644
--- a/llvm/test/Transforms/LoopVectorize/if-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/if-reduction.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s | FileCheck %s
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
@@ -14,11 +15,59 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; return sum;
; }
-; CHECK-LABEL: @fcmp_0_fadd_select1(
-; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], zeroinitializer
-; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
-; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
define float @fcmp_0_fadd_select1(ptr noalias %x, i32 %N) nounwind readonly {
+; CHECK-LABEL: define float @fcmp_0_fadd_select1(
+; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_HEADER]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]]
+; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[FOR_END]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret float [[TMP9]]
+;
entry:
%cmp.1 = icmp sgt i32 %N, 0
br i1 %cmp.1, label %for.header, label %for.end
@@ -56,11 +105,59 @@ for.end: ; preds = %for.body, %entry
; return sum;
; }
-; CHECK-LABEL: @fcmp_0_fadd_select2(
-; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], zeroinitializer
-; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
-; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
define double @fcmp_0_fadd_select2(ptr noalias %x, i32 %N) nounwind readonly {
+; CHECK-LABEL: define double @fcmp_0_fadd_select2(
+; CHECK-SAME: ptr noalias [[X:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_HEADER]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], 0.000000e+00
+; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]]
+; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[FOR_END]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret double [[TMP9]]
+;
entry:
%cmp.1 = icmp sgt i32 %N, 0
br i1 %cmp.1, label %for.header, label %for.end
@@ -99,11 +196,61 @@ for.end: ; preds = %for.body, %entry
; return sum;
; }
-; CHECK-LABEL: @fcmp_val_fadd_select1(
-; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %broadcast.splat
-; CHECK: %[[V3:.*]] = fadd fast <4 x float> %[[V0]], %[[V2:.*]]
-; CHECK: select <4 x i1> %[[V1]], <4 x float> %[[V3]], <4 x float> %[[V2]]
define float @fcmp_val_fadd_select1(ptr noalias %x, float %y, i32 %N) nounwind readonly {
+; CHECK-LABEL: define float @fcmp_val_fadd_select1(
+; CHECK-SAME: ptr noalias [[X:%.*]], float [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_HEADER]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[Y]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x float> [[TMP4]], <4 x float> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_1:%.*]] = phi float [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt float [[TMP8]], [[Y]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[SUM_1]]
+; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], float [[ADD]], float [[SUM_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi float [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[FOR_END]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi float [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret float [[TMP9]]
+;
entry:
%cmp.1 = icmp sgt i32 %N, 0
br i1 %cmp.1, label %for.header, label %for.end
@@ -142,11 +289,61 @@ for.end: ; preds = %for.body, %entry
; return sum;
; }
-; CHECK-LABEL: @fcmp_val_fadd_select2(
-; CHECK: %[[V1:.*]] = fcmp fast ogt <4 x double> %[[V0:.*]], %broadcast.splat
-; CHECK: %[[V3:.*]] = fadd fast <4 x double> %[[V0]], %[[V2:.*]]
-; CHECK: select <4 x i1> %[[V1]], <4 x double> %[[V3]], <4 x double> %[[V2]]
define double @fcmp_val_fadd_select2(ptr noalias %x, double %y, i32 %N) nounwind readonly {
+; CHECK-LABEL: define double @fcmp_val_fadd_select2(
+; CHECK-SAME: ptr noalias [[X:%.*]], double [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_HEADER]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[Y]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x double> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds double, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp fast ogt <4 x double> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd fast <4 x double> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x double> [[TMP4]], <4 x double> [[VEC_PHI]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[TMP7:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[TMP5]])
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[ZEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[FOR_HEADER]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi double [ [[TMP7]], %[[MIDDLE_BLOCK]] ], [ 0.000000e+00, %[[FOR_HEADER]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[SUM_1:%.*]] = phi double [ [[BC_MERGE_RDX]], %[[SCALAR_PH]] ], [ [[SUM_2:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[X]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = load double, ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = fcmp fast ogt double [[TMP8]], [[Y]]
+; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[TMP8]], [[SUM_1]]
+; CHECK-NEXT: [[SUM_2]] = select i1 [[CMP_2]], double [[ADD]], double [[SUM_1]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[ZEXT]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END_LOOPEXIT]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[FOR_END_LOOPEXIT]]:
+; CHECK-NEXT: [[SUM_2_LCSSA:%.*]] = phi double [ [[SUM_2]], %[[FOR_BODY]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[FOR_END]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: [[TMP9:%.*]] = phi double [ 0.000000e+00, %[[ENTRY]] ], [ [[SUM_2_LCSSA]], %[[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT: ret double [[TMP9]]
+;
entry:
%cmp.1 = icmp sgt i32 %N, 0
br i1 %cmp.1, label %for.header, label %for.end
@@ -186,11 +383,64 @@ for.end: ; preds = %for.body, %entry
; return sum;
; }
-; CHECK-LABEL: @fcmp_array_elm_fadd_select1(
-; CHECK: %[[V2:.*]] = fcmp fast ogt <4 x float> %[[V0:.*]], %[[V1:.*]]
-; CHECK: %[[V4:.*]] = fadd fast <4 x float> %[[V0]], %[[V3:.*]]
-; CHECK: select <4 x i1> %[[V2]], <4 x float> %[[V4]], <4 x float> %[[V3]]
define float @fcmp_array_elm_fadd_select1(ptr noalias %x, ptr noalias %y, i32 %N) nounwind readonly {
+; CHECK-LABEL: define float @fcmp_array_elm_fadd_select1(
+; CHECK-SAME: ptr noalias [[X:%.*]], ptr noalias [[Y:%.*]], i32 [[N:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[N]], 0
+; CHECK-NEXT: br i1 [[CMP_1]], label %[[FOR_HEADER:.*]], label %[[FOR_END:.*]]
+; CHECK: [[FOR_HEADER]]:
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[ZEXT]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[ZEXT]], 4
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[ZEXT]], [[N_MOD_VF]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, ptr [[X]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds float, ptr [[TMP1]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, ptr [[Y]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[TMP3]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, ptr [[TMP4]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast ogt <4...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/107225
More information about the llvm-commits
mailing list