[llvm] [LV] Pre-commit test for #128062 (PR #164801)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 04:12:50 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ramkumar Ramachandra (artagnon)
<details>
<summary>Changes</summary>
In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #<!-- -->128062.
---
Patch is 24.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164801.diff
1 Files Affected:
- (modified) llvm/test/Transforms/LoopVectorize/pointer-induction.ll (+356)
``````````diff
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 5c62ca3ff3d01..9a655bba280e4 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -700,3 +700,359 @@ exit:
%result = add i64 %cast.ptr, %0
ret i64 %result
}
+
+; FIXME: Unprofitable vectorization.
+; Should be smarter about handling interleave groups.
+define void @pr128062(ptr %dst.start, i64 %n, i8 %a) {
+; DEFAULT-LABEL: @pr128062(
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; DEFAULT-NEXT: [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; DEFAULT-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; DEFAULT-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT: vector.ph:
+; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
+; DEFAULT: vector.body:
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; DEFAULT-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; DEFAULT-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; DEFAULT-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; DEFAULT-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; DEFAULT-NEXT: [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT: [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT: [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT: [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; DEFAULT-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; DEFAULT-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; DEFAULT-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; DEFAULT-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT: store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; DEFAULT-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; DEFAULT-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; DEFAULT-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; DEFAULT-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; DEFAULT-NEXT: [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; DEFAULT-NEXT: [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; DEFAULT-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; DEFAULT-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; DEFAULT-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP45]], ptr [[TMP28]], align 1
+; DEFAULT-NEXT: store i8 [[TMP46]], ptr [[TMP29]], align 1
+; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP30]], align 1
+; DEFAULT-NEXT: store i8 [[TMP48]], ptr [[TMP31]], align 1
+; DEFAULT-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; DEFAULT-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; DEFAULT-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; DEFAULT-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; DEFAULT-NEXT: [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; DEFAULT-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; DEFAULT-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; DEFAULT-NEXT: [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; DEFAULT-NEXT: [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; DEFAULT-NEXT: [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; DEFAULT-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; DEFAULT-NEXT: [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP66]], ptr [[TMP49]], align 1
+; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP50]], align 1
+; DEFAULT-NEXT: store i8 [[TMP68]], ptr [[TMP51]], align 1
+; DEFAULT-NEXT: store i8 [[TMP69]], ptr [[TMP52]], align 1
+; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; DEFAULT: middle.block:
+; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; DEFAULT: scalar.ph:
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; DEFAULT-NEXT: br label [[LOOP:%.*]]
+; DEFAULT: loop:
+; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT: [[DST:%.*]] = phi ptr [ [[DST_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT: [[DST_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
+; DEFAULT-NEXT: [[LOAD_DST:%.*]] = load i8, ptr [[DST]], align 1
+; DEFAULT-NEXT: [[DST_EXT:%.*]] = zext i8 [[LOAD_DST]] to i16
+; DEFAULT-NEXT: [[MUL_DST_0:%.*]] = mul nuw i16 [[DST_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_0:%.*]] = udiv i16 [[MUL_DST_0]], 255
+; DEFAULT-NEXT: [[TRUNC_0:%.*]] = trunc nuw i16 [[UDIV_0]] to i8
+; DEFAULT-NEXT: [[VAL_0:%.*]] = add i8 [[A]], [[TRUNC_0]]
+; DEFAULT-NEXT: store i8 [[VAL_0]], ptr [[DST]], align 1
+; DEFAULT-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
+; DEFAULT-NEXT: [[LOAD_DST_1:%.*]] = load i8, ptr [[GEP_DST_1]], align 1
+; DEFAULT-NEXT: [[DST_1_EXT:%.*]] = zext i8 [[LOAD_DST_1]] to i16
+; DEFAULT-NEXT: [[MUL_DST_1:%.*]] = mul nuw i16 [[DST_1_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_1:%.*]] = udiv i16 [[MUL_DST_1]], 255
+; DEFAULT-NEXT: [[TRUNC_1:%.*]] = trunc nuw i16 [[UDIV_1]] to i8
+; DEFAULT-NEXT: [[VAL_1:%.*]] = add i8 [[A]], [[TRUNC_1]]
+; DEFAULT-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
+; DEFAULT-NEXT: [[LOAD_DST_2:%.*]] = load i8, ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT: [[DST_2_EXT:%.*]] = zext i8 [[LOAD_DST_2]] to i16
+; DEFAULT-NEXT: [[MUL_DST_2:%.*]] = mul nuw i16 [[DST_2_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_2:%.*]] = udiv i16 [[MUL_DST_2]], 255
+; DEFAULT-NEXT: [[TRUNC_2:%.*]] = trunc nuw i16 [[UDIV_2]] to i8
+; DEFAULT-NEXT: [[VAL_2:%.*]] = add i8 [[A]], [[TRUNC_2]]
+; DEFAULT-NEXT: store i8 [[VAL_2]], ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
+; DEFAULT-NEXT: [[LOAD_DST_3:%.*]] = load i8, ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT: [[DST_3_EXT:%.*]] = zext i8 [[LOAD_DST_3]] to i16
+; DEFAULT-NEXT: [[MUL_DST_3:%.*]] = mul nuw i16 [[DST_3_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_3:%.*]] = udiv i16 [[MUL_DST_3]], 255
+; DEFAULT-NEXT: [[TRUNC_3:%.*]] = trunc nuw i16 [[UDIV_3]] to i8
+; DEFAULT-NEXT: [[VAL_3:%.*]] = add i8 [[A]], [[TRUNC_3]]
+; DEFAULT-NEXT: store i8 [[VAL_3]], ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], -4
+; DEFAULT-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; DEFAULT-NEXT: br i1 [[EXIT_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; DEFAULT: exit:
+; DEFAULT-NEXT: ret void
+;
+; STRIDED-LABEL: @pr128062(
+; STRIDED-NEXT: entry:
+; STRIDED-NEXT: [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; STRIDED-NEXT: [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; STRIDED-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; STRIDED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; STRIDED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; STRIDED: vector.ph:
+; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; STRIDED-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
+; STRIDED: vector.body:
+; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; STRIDED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; STRIDED-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; STRIDED-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; STRIDED-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; STRIDED-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; STRIDED-NEXT: [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT: [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT: [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT: [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; STRIDED-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; STRIDED-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; STRIDED-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; STRIDED-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; STRIDED-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; STRIDED-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; STRIDED-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; STRIDED-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; STRIDED-NEXT: store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT: store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; STRIDED-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; STRIDED-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; STRIDED-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; STRIDED-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; STRIDED-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; STRIDED-NEXT: [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; STRIDED-NEXT: [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; STRIDED-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; STRIDED-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; STRIDED-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; STRIDED-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; STRIDED-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; STRIDED-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; STRIDED-NEXT: [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; STRIDED-NEXT: [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; STRIDED-NEXT: [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; STRIDED-NEXT: [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; STRIDED-NEXT: store i8 [[TMP45]], ptr [[TMP28]], align 1
+; STRIDED-NEXT: store i8 [[TMP46]], ptr [[TMP29]], align 1
+; STRIDED-NEXT: store i8 [[TMP47]], ptr [[TMP30]], align 1
+; STRIDED-NEXT: store i8 [[TMP48]], ptr [[TMP31]], align 1
+; STRIDED-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; STRIDED-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; STRIDED-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; STRIDED-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; STRIDED-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; STRIDED-NEXT: [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; STRIDED-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; STRIDED-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; STRIDED-NEXT: [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; STRIDED-NEXT: [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; STRIDED-NEXT: [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; STRIDED-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; STRIDED-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; STRIDED-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; STRIDED-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; STRIDED-NEXT: [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; STRIDED-NEXT: [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; STRIDED-NEXT: [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; STRIDED-NEXT: [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; STRIDED-NEXT: store i8 [[TMP66]], ptr [[TMP49]], align 1
+; STRIDED-NEXT: store i8 [[TMP67]], ptr [[TMP50]], align 1
+; STRIDED-NEXT: store i8 [[TMP68]], ptr [[TMP51]], align 1
+; STRIDED-NEXT: store i8 [[TMP69]], ptr [[TMP52]], align 1
+; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; STRIDED: middle.block:
+; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; STRIDED: scalar.ph:
+; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; STRIDED-NEXT: br label [[LOOP:%.*]]
+; STRIDED: loop:
+; STRIDED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; STRIDED...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/164801
More information about the llvm-commits
mailing list