[llvm] [LV] Pre-commit test for #128062 (PR #164801)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 23 04:12:50 PDT 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #<!-- -->128062.

---

Patch is 24.22 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164801.diff


1 Files Affected:

- (modified) llvm/test/Transforms/LoopVectorize/pointer-induction.ll (+356) 


``````````diff
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 5c62ca3ff3d01..9a655bba280e4 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -700,3 +700,359 @@ exit:
   %result = add i64 %cast.ptr, %0
   ret i64 %result
 }
+
+; FIXME: Unprofitable vectorization.
+; Should be smarter about handling interleave groups.
+define void @pr128062(ptr %dst.start, i64 %n, i8 %a) {
+; DEFAULT-LABEL: @pr128062(
+; DEFAULT-NEXT:  entry:
+; DEFAULT-NEXT:    [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; DEFAULT-NEXT:    [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; DEFAULT-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; DEFAULT-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT:       vector.ph:
+; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; DEFAULT-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; DEFAULT-NEXT:    [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; DEFAULT-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
+; DEFAULT:       vector.body:
+; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; DEFAULT-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; DEFAULT-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; DEFAULT-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; DEFAULT-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; DEFAULT-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; DEFAULT-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; DEFAULT-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; DEFAULT-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; DEFAULT-NEXT:    [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT:    [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT:    [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT:    [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT:    [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; DEFAULT-NEXT:    [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; DEFAULT-NEXT:    [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; DEFAULT-NEXT:    [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; DEFAULT-NEXT:    [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; DEFAULT-NEXT:    [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; DEFAULT-NEXT:    [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; DEFAULT-NEXT:    [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; DEFAULT-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; DEFAULT-NEXT:    [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; DEFAULT-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; DEFAULT-NEXT:    [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; DEFAULT-NEXT:    store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; DEFAULT-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; DEFAULT-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; DEFAULT-NEXT:    [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; DEFAULT-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; DEFAULT-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; DEFAULT-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; DEFAULT-NEXT:    [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; DEFAULT-NEXT:    [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; DEFAULT-NEXT:    [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; DEFAULT-NEXT:    [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; DEFAULT-NEXT:    [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; DEFAULT-NEXT:    [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; DEFAULT-NEXT:    [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; DEFAULT-NEXT:    [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; DEFAULT-NEXT:    [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; DEFAULT-NEXT:    [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; DEFAULT-NEXT:    [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; DEFAULT-NEXT:    [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; DEFAULT-NEXT:    store i8 [[TMP45]], ptr [[TMP28]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP46]], ptr [[TMP29]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP47]], ptr [[TMP30]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP48]], ptr [[TMP31]], align 1
+; DEFAULT-NEXT:    [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; DEFAULT-NEXT:    [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; DEFAULT-NEXT:    [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; DEFAULT-NEXT:    [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; DEFAULT-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; DEFAULT-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; DEFAULT-NEXT:    [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; DEFAULT-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; DEFAULT-NEXT:    [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; DEFAULT-NEXT:    [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; DEFAULT-NEXT:    [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; DEFAULT-NEXT:    [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; DEFAULT-NEXT:    [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; DEFAULT-NEXT:    [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; DEFAULT-NEXT:    [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; DEFAULT-NEXT:    [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; DEFAULT-NEXT:    [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; DEFAULT-NEXT:    [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; DEFAULT-NEXT:    [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; DEFAULT-NEXT:    [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; DEFAULT-NEXT:    store i8 [[TMP66]], ptr [[TMP49]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP67]], ptr [[TMP50]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP68]], ptr [[TMP51]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP69]], ptr [[TMP52]], align 1
+; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; DEFAULT-NEXT:    [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT:    br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; DEFAULT:       middle.block:
+; DEFAULT-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; DEFAULT-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; DEFAULT:       scalar.ph:
+; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; DEFAULT-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; DEFAULT-NEXT:    br label [[LOOP:%.*]]
+; DEFAULT:       loop:
+; DEFAULT-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT:    [[DST:%.*]] = phi ptr [ [[DST_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT:    [[DST_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
+; DEFAULT-NEXT:    [[LOAD_DST:%.*]] = load i8, ptr [[DST]], align 1
+; DEFAULT-NEXT:    [[DST_EXT:%.*]] = zext i8 [[LOAD_DST]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_0:%.*]] = mul nuw i16 [[DST_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_0:%.*]] = udiv i16 [[MUL_DST_0]], 255
+; DEFAULT-NEXT:    [[TRUNC_0:%.*]] = trunc nuw i16 [[UDIV_0]] to i8
+; DEFAULT-NEXT:    [[VAL_0:%.*]] = add i8 [[A]], [[TRUNC_0]]
+; DEFAULT-NEXT:    store i8 [[VAL_0]], ptr [[DST]], align 1
+; DEFAULT-NEXT:    [[GEP_DST_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
+; DEFAULT-NEXT:    [[LOAD_DST_1:%.*]] = load i8, ptr [[GEP_DST_1]], align 1
+; DEFAULT-NEXT:    [[DST_1_EXT:%.*]] = zext i8 [[LOAD_DST_1]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_1:%.*]] = mul nuw i16 [[DST_1_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_1:%.*]] = udiv i16 [[MUL_DST_1]], 255
+; DEFAULT-NEXT:    [[TRUNC_1:%.*]] = trunc nuw i16 [[UDIV_1]] to i8
+; DEFAULT-NEXT:    [[VAL_1:%.*]] = add i8 [[A]], [[TRUNC_1]]
+; DEFAULT-NEXT:    [[GEP_DST_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
+; DEFAULT-NEXT:    [[LOAD_DST_2:%.*]] = load i8, ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT:    [[DST_2_EXT:%.*]] = zext i8 [[LOAD_DST_2]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_2:%.*]] = mul nuw i16 [[DST_2_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_2:%.*]] = udiv i16 [[MUL_DST_2]], 255
+; DEFAULT-NEXT:    [[TRUNC_2:%.*]] = trunc nuw i16 [[UDIV_2]] to i8
+; DEFAULT-NEXT:    [[VAL_2:%.*]] = add i8 [[A]], [[TRUNC_2]]
+; DEFAULT-NEXT:    store i8 [[VAL_2]], ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT:    [[GEP_DST_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
+; DEFAULT-NEXT:    [[LOAD_DST_3:%.*]] = load i8, ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT:    [[DST_3_EXT:%.*]] = zext i8 [[LOAD_DST_3]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_3:%.*]] = mul nuw i16 [[DST_3_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_3:%.*]] = udiv i16 [[MUL_DST_3]], 255
+; DEFAULT-NEXT:    [[TRUNC_3:%.*]] = trunc nuw i16 [[UDIV_3]] to i8
+; DEFAULT-NEXT:    [[VAL_3:%.*]] = add i8 [[A]], [[TRUNC_3]]
+; DEFAULT-NEXT:    store i8 [[VAL_3]], ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT:    [[IV_NEXT]] = add i64 [[IV]], -4
+; DEFAULT-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; DEFAULT-NEXT:    br i1 [[EXIT_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; DEFAULT:       exit:
+; DEFAULT-NEXT:    ret void
+;
+; STRIDED-LABEL: @pr128062(
+; STRIDED-NEXT:  entry:
+; STRIDED-NEXT:    [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; STRIDED-NEXT:    [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; STRIDED-NEXT:    [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; STRIDED-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; STRIDED-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; STRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; STRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; STRIDED:       vector.ph:
+; STRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; STRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; STRIDED-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; STRIDED-NEXT:    [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; STRIDED-NEXT:    [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; STRIDED:       vector.body:
+; STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; STRIDED-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; STRIDED-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; STRIDED-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; STRIDED-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; STRIDED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; STRIDED-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; STRIDED-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; STRIDED-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT:    [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT:    [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT:    [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; STRIDED-NEXT:    [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; STRIDED-NEXT:    [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; STRIDED-NEXT:    [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; STRIDED-NEXT:    [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT:    [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; STRIDED-NEXT:    [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; STRIDED-NEXT:    [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; STRIDED-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; STRIDED-NEXT:    [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; STRIDED-NEXT:    store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT:    store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT:    store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT:    store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; STRIDED-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; STRIDED-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; STRIDED-NEXT:    [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; STRIDED-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; STRIDED-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; STRIDED-NEXT:    [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; STRIDED-NEXT:    [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; STRIDED-NEXT:    [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; STRIDED-NEXT:    [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT:    [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; STRIDED-NEXT:    [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; STRIDED-NEXT:    [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; STRIDED-NEXT:    [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; STRIDED-NEXT:    [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; STRIDED-NEXT:    store i8 [[TMP45]], ptr [[TMP28]], align 1
+; STRIDED-NEXT:    store i8 [[TMP46]], ptr [[TMP29]], align 1
+; STRIDED-NEXT:    store i8 [[TMP47]], ptr [[TMP30]], align 1
+; STRIDED-NEXT:    store i8 [[TMP48]], ptr [[TMP31]], align 1
+; STRIDED-NEXT:    [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; STRIDED-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; STRIDED-NEXT:    [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; STRIDED-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; STRIDED-NEXT:    [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; STRIDED-NEXT:    [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; STRIDED-NEXT:    [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; STRIDED-NEXT:    [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; STRIDED-NEXT:    [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; STRIDED-NEXT:    [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT:    [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; STRIDED-NEXT:    [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; STRIDED-NEXT:    [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; STRIDED-NEXT:    [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; STRIDED-NEXT:    [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; STRIDED-NEXT:    [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; STRIDED-NEXT:    store i8 [[TMP66]], ptr [[TMP49]], align 1
+; STRIDED-NEXT:    store i8 [[TMP67]], ptr [[TMP50]], align 1
+; STRIDED-NEXT:    store i8 [[TMP68]], ptr [[TMP51]], align 1
+; STRIDED-NEXT:    store i8 [[TMP69]], ptr [[TMP52]], align 1
+; STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT:    [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; STRIDED-NEXT:    br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; STRIDED:       middle.block:
+; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; STRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; STRIDED:       scalar.ph:
+; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; STRIDED-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; STRIDED-NEXT:    br label [[LOOP:%.*]]
+; STRIDED:       loop:
+; STRIDED-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; STRIDED...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/164801


More information about the llvm-commits mailing list