[llvm] [AArch64] NFC test where partial reduce intrinsic can be generated. (PR #136124)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 17 03:48:53 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Sushant Gokhale (sushgokh)
<details>
<summary>Changes</summary>
---
Patch is 48.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/136124.diff
1 Files Affected:
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll (+566)
``````````diff
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
index 1b22523e9f5bd..4184c3ba0d987 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-dot-product.ll
@@ -3419,6 +3419,7 @@ define dso_local void @not_dotp_high_register_pressure(ptr %a, ptr %b, ptr %sum,
; CHECK-MAXBW-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
; CHECK-MAXBW-NEXT: br i1 [[CMP_N]], label [[FOR_COND_FOR_COND_CLEANUP_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
; CHECK-MAXBW: scalar.ph:
+;
entry:
%cmp100 = icmp sgt i32 %n, 0
br i1 %cmp100, label %for.body.lr.ph, label %for.cond.cleanup
@@ -3530,9 +3531,574 @@ for.body: ; preds = %for.body.lr.ph, %fo
br i1 %exitcond.not, label %for.cond.for.cond.cleanup_crit_edge, label %for.body, !llvm.loop !8
}
+define void @usdot_i8_to_i32(ptr %A, ptr %B, i32 %K, ptr %sum, i32 %C2) #2 {
+; CHECK-INTERLEAVE1-LABEL: define void @usdot_i8_to_i32(
+; CHECK-INTERLEAVE1-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[K:%.*]], ptr [[SUM:%.*]], i32 [[C2:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-INTERLEAVE1-NEXT: entry:
+; CHECK-INTERLEAVE1-NEXT: [[CMP137:%.*]] = icmp sgt i32 [[K]], 0
+; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP137]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK-INTERLEAVE1: for.body.lr.ph:
+; CHECK-INTERLEAVE1-NEXT: [[TMP0:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[K]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP1:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP163:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP165:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP163]], i64 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP2:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP167:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP169:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP167]], i64 2
+; CHECK-INTERLEAVE1-NEXT: [[TMP3:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP171:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP173:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP171]], i64 3
+; CHECK-INTERLEAVE1-NEXT: [[TMP4:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP175:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP177:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP175]], i64 4
+; CHECK-INTERLEAVE1-NEXT: [[TMP5:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP179:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP181:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP179]], i64 5
+; CHECK-INTERLEAVE1-NEXT: [[TMP6:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP183:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP185:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP183]], i64 6
+; CHECK-INTERLEAVE1-NEXT: [[TMP7:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP187:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVE1-NEXT: [[INVARIANT_GEP189:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP187]], i64 7
+; CHECK-INTERLEAVE1-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVE1-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4
+; CHECK-INTERLEAVE1-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP15]]
+; CHECK-INTERLEAVE1-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INTERLEAVE1: vector.ph:
+; CHECK-INTERLEAVE1-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVE1-NEXT: [[TMP18:%.*]] = mul i64 [[TMP10]], 4
+; CHECK-INTERLEAVE1-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP18]]
+; CHECK-INTERLEAVE1-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVE1-NEXT: [[TMP13:%.*]] = mul i64 [[TMP23]], 4
+; CHECK-INTERLEAVE1-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-INTERLEAVE1: vector.body:
+; CHECK-INTERLEAVE1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP56:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI6:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[VEC_PHI7:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVE1-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP9]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP57:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 4 x i8>, ptr [[TMP12]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP19:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD8]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP20:%.*]] = mul nsw <vscale x 4 x i32> [[TMP19]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP53]] = add <vscale x 4 x i32> [[TMP20]], [[VEC_PHI7]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP165]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[TMP16]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD9:%.*]] = load <vscale x 4 x i8>, ptr [[TMP17]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP24:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD9]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP25:%.*]] = mul nsw <vscale x 4 x i32> [[TMP24]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP28]] = add <vscale x 4 x i32> [[TMP25]], [[VEC_PHI6]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP169]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP21]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD10:%.*]] = load <vscale x 4 x i8>, ptr [[TMP22]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP29:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD10]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP30:%.*]] = mul nsw <vscale x 4 x i32> [[TMP29]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP33]] = add <vscale x 4 x i32> [[TMP30]], [[VEC_PHI5]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP173]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP27:%.*]] = getelementptr i8, ptr [[TMP26]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD11:%.*]] = load <vscale x 4 x i8>, ptr [[TMP27]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP34:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD11]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP35:%.*]] = mul nsw <vscale x 4 x i32> [[TMP34]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP38]] = add <vscale x 4 x i32> [[TMP35]], [[VEC_PHI4]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP31:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP177]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[TMP31]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD12:%.*]] = load <vscale x 4 x i8>, ptr [[TMP32]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP39:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD12]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP40:%.*]] = mul nsw <vscale x 4 x i32> [[TMP39]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP43]] = add <vscale x 4 x i32> [[TMP40]], [[VEC_PHI3]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP36:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP181]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP37:%.*]] = getelementptr i8, ptr [[TMP36]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD13:%.*]] = load <vscale x 4 x i8>, ptr [[TMP37]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP44:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD13]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP45:%.*]] = mul nsw <vscale x 4 x i32> [[TMP44]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP48]] = add <vscale x 4 x i32> [[TMP45]], [[VEC_PHI2]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP41:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP185]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP42:%.*]] = getelementptr i8, ptr [[TMP41]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD14:%.*]] = load <vscale x 4 x i8>, ptr [[TMP42]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP49:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD14]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP50:%.*]] = mul nsw <vscale x 4 x i32> [[TMP49]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP52]] = add <vscale x 4 x i32> [[TMP50]], [[VEC_PHI1]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP46:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP189]], i64 [[INDEX]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP47:%.*]] = getelementptr i8, ptr [[TMP46]], i32 0
+; CHECK-INTERLEAVE1-NEXT: [[WIDE_LOAD15:%.*]] = load <vscale x 4 x i8>, ptr [[TMP47]], align 1
+; CHECK-INTERLEAVE1-NEXT: [[TMP54:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD15]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVE1-NEXT: [[TMP55:%.*]] = mul nsw <vscale x 4 x i32> [[TMP54]], [[TMP57]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP56]] = add <vscale x 4 x i32> [[TMP55]], [[VEC_PHI]]
+; CHECK-INTERLEAVE1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP13]]
+; CHECK-INTERLEAVE1-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-INTERLEAVE1-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
+; CHECK-INTERLEAVE1: middle.block:
+; CHECK-INTERLEAVE1-NEXT: [[TMP58:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP56]])
+; CHECK-INTERLEAVE1-NEXT: [[TMP59:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP52]])
+; CHECK-INTERLEAVE1-NEXT: [[TMP60:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP48]])
+; CHECK-INTERLEAVE1-NEXT: [[TMP61:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP43]])
+; CHECK-INTERLEAVE1-NEXT: [[TMP62:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP38]])
+; CHECK-INTERLEAVE1-NEXT: [[TMP63:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP33]])
+; CHECK-INTERLEAVE1-NEXT: [[TMP64:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP28]])
+; CHECK-INTERLEAVE1-NEXT: [[TMP65:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32(<vscale x 4 x i32> [[TMP53]])
+; CHECK-INTERLEAVE1-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[WIDE_TRIP_COUNT]], [[N_VEC]]
+; CHECK-INTERLEAVE1-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-INTERLEAVE1: scalar.ph:
+;
+; CHECK-INTERLEAVED-LABEL: define void @usdot_i8_to_i32(
+; CHECK-INTERLEAVED-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i32 [[K:%.*]], ptr [[SUM:%.*]], i32 [[C2:%.*]]) #[[ATTR2:[0-9]+]] {
+; CHECK-INTERLEAVED-NEXT: entry:
+; CHECK-INTERLEAVED-NEXT: [[CMP137:%.*]] = icmp sgt i32 [[K]], 0
+; CHECK-INTERLEAVED-NEXT: br i1 [[CMP137]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK-INTERLEAVED: for.body.lr.ph:
+; CHECK-INTERLEAVED-NEXT: [[TMP0:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[K]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP0]]
+; CHECK-INTERLEAVED-NEXT: [[TMP1:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP163:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP165:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP163]], i64 1
+; CHECK-INTERLEAVED-NEXT: [[TMP2:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP167:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP2]]
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP169:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP167]], i64 2
+; CHECK-INTERLEAVED-NEXT: [[TMP3:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP171:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP3]]
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP173:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP171]], i64 3
+; CHECK-INTERLEAVED-NEXT: [[TMP4:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP175:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP4]]
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP177:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP175]], i64 4
+; CHECK-INTERLEAVED-NEXT: [[TMP5:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP179:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP5]]
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP181:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP179]], i64 5
+; CHECK-INTERLEAVED-NEXT: [[TMP6:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP183:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP6]]
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP185:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP183]], i64 6
+; CHECK-INTERLEAVED-NEXT: [[TMP7:%.*]] = sext i32 [[C2]] to i64
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP187:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP7]]
+; CHECK-INTERLEAVED-NEXT: [[INVARIANT_GEP189:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP187]], i64 7
+; CHECK-INTERLEAVED-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 8
+; CHECK-INTERLEAVED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[WIDE_TRIP_COUNT]], [[TMP15]]
+; CHECK-INTERLEAVED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-INTERLEAVED: vector.ph:
+; CHECK-INTERLEAVED-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT: [[TMP33:%.*]] = mul i64 [[TMP10]], 8
+; CHECK-INTERLEAVED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[WIDE_TRIP_COUNT]], [[TMP33]]
+; CHECK-INTERLEAVED-NEXT: [[N_VEC:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[N_MOD_VF]]
+; CHECK-INTERLEAVED-NEXT: [[TMP43:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT: [[TMP13:%.*]] = mul i64 [[TMP43]], 8
+; CHECK-INTERLEAVED-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK-INTERLEAVED: vector.body:
+; CHECK-INTERLEAVED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP107:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI1:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP108:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI2:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP96:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI3:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP97:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI4:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP85:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI5:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP86:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI6:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP74:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI7:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP75:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI8:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP63:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI9:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP64:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI10:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP52:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI11:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP53:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI12:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI13:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP54:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI14:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[VEC_PHI15:%.*]] = phi <vscale x 4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP55:%.*]], [[VECTOR_BODY]] ]
+; CHECK-INTERLEAVED-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw i8, ptr [[A]], i64 [[INDEX]]
+; CHECK-INTERLEAVED-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i32 0
+; CHECK-INTERLEAVED-NEXT: [[TMP65:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT: [[TMP66:%.*]] = mul i64 [[TMP65]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP18:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP8]], i64 [[TMP66]]
+; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i8>, ptr [[TMP9]], align 1
+; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD16:%.*]] = load <vscale x 4 x i8>, ptr [[TMP18]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVED-NEXT: [[TMP20:%.*]] = zext <vscale x 4 x i8> [[WIDE_LOAD16]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVED-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[INVARIANT_GEP]], i64 [[INDEX]]
+; CHECK-INTERLEAVED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP11]], i32 0
+; CHECK-INTERLEAVED-NEXT: [[TMP23:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-INTERLEAVED-NEXT: [[TMP24:%.*]] = mul i64 [[TMP23]], 4
+; CHECK-INTERLEAVED-NEXT: [[TMP25:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[TMP24]]
+; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD17:%.*]] = load <vscale x 4 x i8>, ptr [[TMP12]], align 1
+; CHECK-INTERLEAVED-NEXT: [[WIDE_LOAD18:%.*]] = load <vscale x 4 x i8>, ptr [[TMP25]], align 1
+; CHECK-INTERLEAVED-NEXT: [[TMP76:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD17]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVED-NEXT: [[TMP77:%.*]] = sext <vscale x 4 x i8> [[WIDE_LOAD18]] to <vscale x 4 x i32>
+; CHECK-INTERLEAVED-NEXT: [[TMP28:%.*]] = mul nsw <vscale x 4 x i32> [[TMP76]], [[TMP19]]
+; CHECK-INTERLEAVED-NEXT: [[TMP29:%.*]] = mul nsw <vscale x 4 x i32> [[TMP77]], [[TMP20]]
+; CHECK-INTERLEAVED-NEXT: [[TMP30]] = add <vscale x 4 x i32> [[TMP28]], [[VEC_PHI14]]
+; CHECK-INTERLEAVED-NEXT: [[TMP55]] = add <vscale x 4 x i32> [[TMP29]], [[VEC_PHI15]]
+; CHECK-INTERLEAVED-NEXT: [[TMP16:%.*]] = gete...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/136124
More information about the llvm-commits
mailing list