[llvm] [LV] Pre-commit test for #128062 (PR #164801)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 04:12:00 PDT 2025
https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/164801
In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #128062.
>From 62babd1ba13ed0a314ea4d119b2cb5428eabbae4 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 23 Oct 2025 11:33:21 +0100
Subject: [PATCH] [LV] Pre-commit test for #128062
In preparation to extend the work done by dfa665f ([VPlan] Add
transformation to narrow interleave groups) to make the narrowing more
powerful, pre-commit a test case from #128062.
---
.../LoopVectorize/pointer-induction.ll | 356 ++++++++++++++++++
1 file changed, 356 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 5c62ca3ff3d01..9a655bba280e4 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -700,3 +700,359 @@ exit:
%result = add i64 %cast.ptr, %0
ret i64 %result
}
+
+; FIXME: Unprofitable vectorization.
+; Should be smarter about handling interleave groups.
+define void @pr128062(ptr %dst.start, i64 %n, i8 %a) {
+; DEFAULT-LABEL: @pr128062(
+; DEFAULT-NEXT: entry:
+; DEFAULT-NEXT: [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; DEFAULT-NEXT: [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; DEFAULT-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; DEFAULT-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; DEFAULT-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT: vector.ph:
+; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; DEFAULT-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; DEFAULT-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; DEFAULT-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
+; DEFAULT: vector.body:
+; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; DEFAULT-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; DEFAULT-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; DEFAULT-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; DEFAULT-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; DEFAULT-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; DEFAULT-NEXT: [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT: [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT: [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT: [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; DEFAULT-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; DEFAULT-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; DEFAULT-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; DEFAULT-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; DEFAULT-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; DEFAULT-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; DEFAULT-NEXT: [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT: store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; DEFAULT-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; DEFAULT-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; DEFAULT-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; DEFAULT-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; DEFAULT-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; DEFAULT-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; DEFAULT-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; DEFAULT-NEXT: [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; DEFAULT-NEXT: [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; DEFAULT-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; DEFAULT-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; DEFAULT-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; DEFAULT-NEXT: [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; DEFAULT-NEXT: [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; DEFAULT-NEXT: [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; DEFAULT-NEXT: [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP45]], ptr [[TMP28]], align 1
+; DEFAULT-NEXT: store i8 [[TMP46]], ptr [[TMP29]], align 1
+; DEFAULT-NEXT: store i8 [[TMP47]], ptr [[TMP30]], align 1
+; DEFAULT-NEXT: store i8 [[TMP48]], ptr [[TMP31]], align 1
+; DEFAULT-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; DEFAULT-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; DEFAULT-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; DEFAULT-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; DEFAULT-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; DEFAULT-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; DEFAULT-NEXT: [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; DEFAULT-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; DEFAULT-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; DEFAULT-NEXT: [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; DEFAULT-NEXT: [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; DEFAULT-NEXT: [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; DEFAULT-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; DEFAULT-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; DEFAULT-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; DEFAULT-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; DEFAULT-NEXT: [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; DEFAULT-NEXT: [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; DEFAULT-NEXT: [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; DEFAULT-NEXT: [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; DEFAULT-NEXT: store i8 [[TMP66]], ptr [[TMP49]], align 1
+; DEFAULT-NEXT: store i8 [[TMP67]], ptr [[TMP50]], align 1
+; DEFAULT-NEXT: store i8 [[TMP68]], ptr [[TMP51]], align 1
+; DEFAULT-NEXT: store i8 [[TMP69]], ptr [[TMP52]], align 1
+; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; DEFAULT-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; DEFAULT: middle.block:
+; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; DEFAULT-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; DEFAULT: scalar.ph:
+; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; DEFAULT-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; DEFAULT-NEXT: br label [[LOOP:%.*]]
+; DEFAULT: loop:
+; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT: [[DST:%.*]] = phi ptr [ [[DST_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT: [[DST_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
+; DEFAULT-NEXT: [[LOAD_DST:%.*]] = load i8, ptr [[DST]], align 1
+; DEFAULT-NEXT: [[DST_EXT:%.*]] = zext i8 [[LOAD_DST]] to i16
+; DEFAULT-NEXT: [[MUL_DST_0:%.*]] = mul nuw i16 [[DST_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_0:%.*]] = udiv i16 [[MUL_DST_0]], 255
+; DEFAULT-NEXT: [[TRUNC_0:%.*]] = trunc nuw i16 [[UDIV_0]] to i8
+; DEFAULT-NEXT: [[VAL_0:%.*]] = add i8 [[A]], [[TRUNC_0]]
+; DEFAULT-NEXT: store i8 [[VAL_0]], ptr [[DST]], align 1
+; DEFAULT-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
+; DEFAULT-NEXT: [[LOAD_DST_1:%.*]] = load i8, ptr [[GEP_DST_1]], align 1
+; DEFAULT-NEXT: [[DST_1_EXT:%.*]] = zext i8 [[LOAD_DST_1]] to i16
+; DEFAULT-NEXT: [[MUL_DST_1:%.*]] = mul nuw i16 [[DST_1_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_1:%.*]] = udiv i16 [[MUL_DST_1]], 255
+; DEFAULT-NEXT: [[TRUNC_1:%.*]] = trunc nuw i16 [[UDIV_1]] to i8
+; DEFAULT-NEXT: [[VAL_1:%.*]] = add i8 [[A]], [[TRUNC_1]]
+; DEFAULT-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
+; DEFAULT-NEXT: [[LOAD_DST_2:%.*]] = load i8, ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT: [[DST_2_EXT:%.*]] = zext i8 [[LOAD_DST_2]] to i16
+; DEFAULT-NEXT: [[MUL_DST_2:%.*]] = mul nuw i16 [[DST_2_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_2:%.*]] = udiv i16 [[MUL_DST_2]], 255
+; DEFAULT-NEXT: [[TRUNC_2:%.*]] = trunc nuw i16 [[UDIV_2]] to i8
+; DEFAULT-NEXT: [[VAL_2:%.*]] = add i8 [[A]], [[TRUNC_2]]
+; DEFAULT-NEXT: store i8 [[VAL_2]], ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
+; DEFAULT-NEXT: [[LOAD_DST_3:%.*]] = load i8, ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT: [[DST_3_EXT:%.*]] = zext i8 [[LOAD_DST_3]] to i16
+; DEFAULT-NEXT: [[MUL_DST_3:%.*]] = mul nuw i16 [[DST_3_EXT]], [[A_EXT]]
+; DEFAULT-NEXT: [[UDIV_3:%.*]] = udiv i16 [[MUL_DST_3]], 255
+; DEFAULT-NEXT: [[TRUNC_3:%.*]] = trunc nuw i16 [[UDIV_3]] to i8
+; DEFAULT-NEXT: [[VAL_3:%.*]] = add i8 [[A]], [[TRUNC_3]]
+; DEFAULT-NEXT: store i8 [[VAL_3]], ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], -4
+; DEFAULT-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; DEFAULT-NEXT: br i1 [[EXIT_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; DEFAULT: exit:
+; DEFAULT-NEXT: ret void
+;
+; STRIDED-LABEL: @pr128062(
+; STRIDED-NEXT: entry:
+; STRIDED-NEXT: [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; STRIDED-NEXT: [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; STRIDED-NEXT: [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; STRIDED-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; STRIDED-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; STRIDED-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; STRIDED-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; STRIDED: vector.ph:
+; STRIDED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; STRIDED-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; STRIDED-NEXT: [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; STRIDED-NEXT: [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; STRIDED-NEXT: [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; STRIDED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; STRIDED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]]
+; STRIDED: vector.body:
+; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; STRIDED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; STRIDED-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; STRIDED-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; STRIDED-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; STRIDED-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; STRIDED-NEXT: [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT: [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT: [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT: [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT: [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; STRIDED-NEXT: [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; STRIDED-NEXT: [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; STRIDED-NEXT: [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; STRIDED-NEXT: [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; STRIDED-NEXT: [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; STRIDED-NEXT: [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; STRIDED-NEXT: [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; STRIDED-NEXT: [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; STRIDED-NEXT: [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; STRIDED-NEXT: [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; STRIDED-NEXT: [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; STRIDED-NEXT: store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT: store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT: store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT: store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT: [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; STRIDED-NEXT: [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; STRIDED-NEXT: [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; STRIDED-NEXT: [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; STRIDED-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; STRIDED-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; STRIDED-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; STRIDED-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; STRIDED-NEXT: [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; STRIDED-NEXT: [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; STRIDED-NEXT: [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; STRIDED-NEXT: [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; STRIDED-NEXT: [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; STRIDED-NEXT: [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; STRIDED-NEXT: [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; STRIDED-NEXT: [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; STRIDED-NEXT: [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; STRIDED-NEXT: [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; STRIDED-NEXT: [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; STRIDED-NEXT: [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; STRIDED-NEXT: store i8 [[TMP45]], ptr [[TMP28]], align 1
+; STRIDED-NEXT: store i8 [[TMP46]], ptr [[TMP29]], align 1
+; STRIDED-NEXT: store i8 [[TMP47]], ptr [[TMP30]], align 1
+; STRIDED-NEXT: store i8 [[TMP48]], ptr [[TMP31]], align 1
+; STRIDED-NEXT: [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; STRIDED-NEXT: [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; STRIDED-NEXT: [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; STRIDED-NEXT: [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; STRIDED-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; STRIDED-NEXT: [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; STRIDED-NEXT: [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; STRIDED-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; STRIDED-NEXT: [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; STRIDED-NEXT: [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; STRIDED-NEXT: [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; STRIDED-NEXT: [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; STRIDED-NEXT: [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; STRIDED-NEXT: [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT: [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; STRIDED-NEXT: [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; STRIDED-NEXT: [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; STRIDED-NEXT: [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; STRIDED-NEXT: [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; STRIDED-NEXT: [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; STRIDED-NEXT: [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; STRIDED-NEXT: store i8 [[TMP66]], ptr [[TMP49]], align 1
+; STRIDED-NEXT: store i8 [[TMP67]], ptr [[TMP50]], align 1
+; STRIDED-NEXT: store i8 [[TMP68]], ptr [[TMP51]], align 1
+; STRIDED-NEXT: store i8 [[TMP69]], ptr [[TMP52]], align 1
+; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT: [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; STRIDED: middle.block:
+; STRIDED-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; STRIDED-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; STRIDED: scalar.ph:
+; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; STRIDED-NEXT: [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; STRIDED-NEXT: br label [[LOOP:%.*]]
+; STRIDED: loop:
+; STRIDED-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; STRIDED-NEXT: [[DST:%.*]] = phi ptr [ [[DST_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; STRIDED-NEXT: [[DST_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
+; STRIDED-NEXT: [[LOAD_DST:%.*]] = load i8, ptr [[DST]], align 1
+; STRIDED-NEXT: [[DST_EXT:%.*]] = zext i8 [[LOAD_DST]] to i16
+; STRIDED-NEXT: [[MUL_DST_0:%.*]] = mul nuw i16 [[DST_EXT]], [[A_EXT]]
+; STRIDED-NEXT: [[UDIV_0:%.*]] = udiv i16 [[MUL_DST_0]], 255
+; STRIDED-NEXT: [[TRUNC_0:%.*]] = trunc nuw i16 [[UDIV_0]] to i8
+; STRIDED-NEXT: [[VAL_0:%.*]] = add i8 [[A]], [[TRUNC_0]]
+; STRIDED-NEXT: store i8 [[VAL_0]], ptr [[DST]], align 1
+; STRIDED-NEXT: [[GEP_DST_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
+; STRIDED-NEXT: [[LOAD_DST_1:%.*]] = load i8, ptr [[GEP_DST_1]], align 1
+; STRIDED-NEXT: [[DST_1_EXT:%.*]] = zext i8 [[LOAD_DST_1]] to i16
+; STRIDED-NEXT: [[MUL_DST_1:%.*]] = mul nuw i16 [[DST_1_EXT]], [[A_EXT]]
+; STRIDED-NEXT: [[UDIV_1:%.*]] = udiv i16 [[MUL_DST_1]], 255
+; STRIDED-NEXT: [[TRUNC_1:%.*]] = trunc nuw i16 [[UDIV_1]] to i8
+; STRIDED-NEXT: [[VAL_1:%.*]] = add i8 [[A]], [[TRUNC_1]]
+; STRIDED-NEXT: [[GEP_DST_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
+; STRIDED-NEXT: [[LOAD_DST_2:%.*]] = load i8, ptr [[GEP_DST_2]], align 1
+; STRIDED-NEXT: [[DST_2_EXT:%.*]] = zext i8 [[LOAD_DST_2]] to i16
+; STRIDED-NEXT: [[MUL_DST_2:%.*]] = mul nuw i16 [[DST_2_EXT]], [[A_EXT]]
+; STRIDED-NEXT: [[UDIV_2:%.*]] = udiv i16 [[MUL_DST_2]], 255
+; STRIDED-NEXT: [[TRUNC_2:%.*]] = trunc nuw i16 [[UDIV_2]] to i8
+; STRIDED-NEXT: [[VAL_2:%.*]] = add i8 [[A]], [[TRUNC_2]]
+; STRIDED-NEXT: store i8 [[VAL_2]], ptr [[GEP_DST_2]], align 1
+; STRIDED-NEXT: [[GEP_DST_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
+; STRIDED-NEXT: [[LOAD_DST_3:%.*]] = load i8, ptr [[GEP_DST_3]], align 1
+; STRIDED-NEXT: [[DST_3_EXT:%.*]] = zext i8 [[LOAD_DST_3]] to i16
+; STRIDED-NEXT: [[MUL_DST_3:%.*]] = mul nuw i16 [[DST_3_EXT]], [[A_EXT]]
+; STRIDED-NEXT: [[UDIV_3:%.*]] = udiv i16 [[MUL_DST_3]], 255
+; STRIDED-NEXT: [[TRUNC_3:%.*]] = trunc nuw i16 [[UDIV_3]] to i8
+; STRIDED-NEXT: [[VAL_3:%.*]] = add i8 [[A]], [[TRUNC_3]]
+; STRIDED-NEXT: store i8 [[VAL_3]], ptr [[GEP_DST_3]], align 1
+; STRIDED-NEXT: [[IV_NEXT]] = add i64 [[IV]], -4
+; STRIDED-NEXT: [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; STRIDED-NEXT: br i1 [[EXIT_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; STRIDED: exit:
+; STRIDED-NEXT: ret void
+;
+entry:
+ %iv.start = and i64 %n, -4
+ %a.ext = zext i8 %a to i16
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ %iv.start, %entry ]
+ %dst = phi ptr [ %dst.next, %loop ], [ %dst.start, %entry ]
+ %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4
+ %load.dst = load i8, ptr %dst, align 1
+ %dst.ext = zext i8 %load.dst to i16
+ %mul.dst.0 = mul nuw i16 %dst.ext, %a.ext
+ %udiv.0 = udiv i16 %mul.dst.0, 255
+ %trunc.0 = trunc nuw i16 %udiv.0 to i8
+ %val.0 = add i8 %a, %trunc.0
+ store i8 %val.0, ptr %dst, align 1
+ %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1
+ %load.dst.1 = load i8, ptr %gep.dst.1, align 1
+ %dst.1.ext = zext i8 %load.dst.1 to i16
+ %mul.dst.1 = mul nuw i16 %dst.1.ext, %a.ext
+ %udiv.1 = udiv i16 %mul.dst.1, 255
+ %trunc.1 = trunc nuw i16 %udiv.1 to i8
+ %val.1 = add i8 %a, %trunc.1
+ %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2
+ %load.dst.2 = load i8, ptr %gep.dst.2, align 1
+ %dst.2.ext = zext i8 %load.dst.2 to i16
+ %mul.dst.2 = mul nuw i16 %dst.2.ext, %a.ext
+ %udiv.2 = udiv i16 %mul.dst.2, 255
+ %trunc.2 = trunc nuw i16 %udiv.2 to i8
+ %val.2 = add i8 %a, %trunc.2
+ store i8 %val.2, ptr %gep.dst.2, align 1
+ %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3
+ %load.dst.3 = load i8, ptr %gep.dst.3, align 1
+ %dst.3.ext = zext i8 %load.dst.3 to i16
+ %mul.dst.3 = mul nuw i16 %dst.3.ext, %a.ext
+ %udiv.3 = udiv i16 %mul.dst.3, 255
+ %trunc.3 = trunc nuw i16 %udiv.3 to i8
+ %val.3 = add i8 %a, %trunc.3
+ store i8 %val.3, ptr %gep.dst.3, align 1
+ %iv.next = add i64 %iv, -4
+ %exit.cond = icmp eq i64 %iv.next, 0
+ br i1 %exit.cond, label %exit, label %loop
+
+exit:
+ ret void
+}
More information about the llvm-commits
mailing list