[llvm] [LV] Pre-commit test for #128062 (PR #164801)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 23 04:12:00 PDT 2025


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/164801

In preparation to extend the work done by dfa665f ([VPlan] Add transformation to narrow interleave groups) to make the narrowing more powerful, pre-commit a test case from #128062.

>From 62babd1ba13ed0a314ea4d119b2cb5428eabbae4 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Thu, 23 Oct 2025 11:33:21 +0100
Subject: [PATCH] [LV] Pre-commit test for #128062

In preparation to extend the work done by dfa665f ([VPlan] Add
transformation to narrow interleave groups) to make the narrowing more
powerful, pre-commit a test case from #128062.
---
 .../LoopVectorize/pointer-induction.ll        | 356 ++++++++++++++++++
 1 file changed, 356 insertions(+)

diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
index 5c62ca3ff3d01..9a655bba280e4 100644
--- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -700,3 +700,359 @@ exit:
   %result = add i64 %cast.ptr, %0
   ret i64 %result
 }
+
+; FIXME: Unprofitable vectorization.
+; Should be smarter about handling interleave groups.
+define void @pr128062(ptr %dst.start, i64 %n, i8 %a) {
+; DEFAULT-LABEL: @pr128062(
+; DEFAULT-NEXT:  entry:
+; DEFAULT-NEXT:    [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; DEFAULT-NEXT:    [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; DEFAULT-NEXT:    [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; DEFAULT-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; DEFAULT-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; DEFAULT-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; DEFAULT-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; DEFAULT:       vector.ph:
+; DEFAULT-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; DEFAULT-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; DEFAULT-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; DEFAULT-NEXT:    [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; DEFAULT-NEXT:    [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; DEFAULT-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; DEFAULT-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; DEFAULT-NEXT:    br label [[VECTOR_BODY:%.*]]
+; DEFAULT:       vector.body:
+; DEFAULT-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; DEFAULT-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; DEFAULT-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; DEFAULT-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; DEFAULT-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; DEFAULT-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; DEFAULT-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; DEFAULT-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; DEFAULT-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; DEFAULT-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; DEFAULT-NEXT:    [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT:    [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT:    [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT:    [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT:    [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; DEFAULT-NEXT:    [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; DEFAULT-NEXT:    [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; DEFAULT-NEXT:    [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; DEFAULT-NEXT:    [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; DEFAULT-NEXT:    [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; DEFAULT-NEXT:    [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; DEFAULT-NEXT:    [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; DEFAULT-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; DEFAULT-NEXT:    [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; DEFAULT-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; DEFAULT-NEXT:    [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; DEFAULT-NEXT:    store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; DEFAULT-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; DEFAULT-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; DEFAULT-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; DEFAULT-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; DEFAULT-NEXT:    [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; DEFAULT-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; DEFAULT-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; DEFAULT-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; DEFAULT-NEXT:    [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; DEFAULT-NEXT:    [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; DEFAULT-NEXT:    [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; DEFAULT-NEXT:    [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; DEFAULT-NEXT:    [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; DEFAULT-NEXT:    [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; DEFAULT-NEXT:    [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; DEFAULT-NEXT:    [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; DEFAULT-NEXT:    [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; DEFAULT-NEXT:    [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; DEFAULT-NEXT:    [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; DEFAULT-NEXT:    [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; DEFAULT-NEXT:    store i8 [[TMP45]], ptr [[TMP28]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP46]], ptr [[TMP29]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP47]], ptr [[TMP30]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP48]], ptr [[TMP31]], align 1
+; DEFAULT-NEXT:    [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; DEFAULT-NEXT:    [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; DEFAULT-NEXT:    [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; DEFAULT-NEXT:    [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; DEFAULT-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; DEFAULT-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; DEFAULT-NEXT:    [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; DEFAULT-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; DEFAULT-NEXT:    [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; DEFAULT-NEXT:    [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; DEFAULT-NEXT:    [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; DEFAULT-NEXT:    [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; DEFAULT-NEXT:    [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; DEFAULT-NEXT:    [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; DEFAULT-NEXT:    [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; DEFAULT-NEXT:    [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; DEFAULT-NEXT:    [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; DEFAULT-NEXT:    [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; DEFAULT-NEXT:    [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; DEFAULT-NEXT:    [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; DEFAULT-NEXT:    [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; DEFAULT-NEXT:    store i8 [[TMP66]], ptr [[TMP49]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP67]], ptr [[TMP50]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP68]], ptr [[TMP51]], align 1
+; DEFAULT-NEXT:    store i8 [[TMP69]], ptr [[TMP52]], align 1
+; DEFAULT-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; DEFAULT-NEXT:    [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; DEFAULT-NEXT:    br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; DEFAULT:       middle.block:
+; DEFAULT-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; DEFAULT-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; DEFAULT:       scalar.ph:
+; DEFAULT-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; DEFAULT-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; DEFAULT-NEXT:    br label [[LOOP:%.*]]
+; DEFAULT:       loop:
+; DEFAULT-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT:    [[DST:%.*]] = phi ptr [ [[DST_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; DEFAULT-NEXT:    [[DST_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
+; DEFAULT-NEXT:    [[LOAD_DST:%.*]] = load i8, ptr [[DST]], align 1
+; DEFAULT-NEXT:    [[DST_EXT:%.*]] = zext i8 [[LOAD_DST]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_0:%.*]] = mul nuw i16 [[DST_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_0:%.*]] = udiv i16 [[MUL_DST_0]], 255
+; DEFAULT-NEXT:    [[TRUNC_0:%.*]] = trunc nuw i16 [[UDIV_0]] to i8
+; DEFAULT-NEXT:    [[VAL_0:%.*]] = add i8 [[A]], [[TRUNC_0]]
+; DEFAULT-NEXT:    store i8 [[VAL_0]], ptr [[DST]], align 1
+; DEFAULT-NEXT:    [[GEP_DST_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
+; DEFAULT-NEXT:    [[LOAD_DST_1:%.*]] = load i8, ptr [[GEP_DST_1]], align 1
+; DEFAULT-NEXT:    [[DST_1_EXT:%.*]] = zext i8 [[LOAD_DST_1]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_1:%.*]] = mul nuw i16 [[DST_1_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_1:%.*]] = udiv i16 [[MUL_DST_1]], 255
+; DEFAULT-NEXT:    [[TRUNC_1:%.*]] = trunc nuw i16 [[UDIV_1]] to i8
+; DEFAULT-NEXT:    [[VAL_1:%.*]] = add i8 [[A]], [[TRUNC_1]]
+; DEFAULT-NEXT:    [[GEP_DST_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
+; DEFAULT-NEXT:    [[LOAD_DST_2:%.*]] = load i8, ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT:    [[DST_2_EXT:%.*]] = zext i8 [[LOAD_DST_2]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_2:%.*]] = mul nuw i16 [[DST_2_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_2:%.*]] = udiv i16 [[MUL_DST_2]], 255
+; DEFAULT-NEXT:    [[TRUNC_2:%.*]] = trunc nuw i16 [[UDIV_2]] to i8
+; DEFAULT-NEXT:    [[VAL_2:%.*]] = add i8 [[A]], [[TRUNC_2]]
+; DEFAULT-NEXT:    store i8 [[VAL_2]], ptr [[GEP_DST_2]], align 1
+; DEFAULT-NEXT:    [[GEP_DST_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
+; DEFAULT-NEXT:    [[LOAD_DST_3:%.*]] = load i8, ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT:    [[DST_3_EXT:%.*]] = zext i8 [[LOAD_DST_3]] to i16
+; DEFAULT-NEXT:    [[MUL_DST_3:%.*]] = mul nuw i16 [[DST_3_EXT]], [[A_EXT]]
+; DEFAULT-NEXT:    [[UDIV_3:%.*]] = udiv i16 [[MUL_DST_3]], 255
+; DEFAULT-NEXT:    [[TRUNC_3:%.*]] = trunc nuw i16 [[UDIV_3]] to i8
+; DEFAULT-NEXT:    [[VAL_3:%.*]] = add i8 [[A]], [[TRUNC_3]]
+; DEFAULT-NEXT:    store i8 [[VAL_3]], ptr [[GEP_DST_3]], align 1
+; DEFAULT-NEXT:    [[IV_NEXT]] = add i64 [[IV]], -4
+; DEFAULT-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; DEFAULT-NEXT:    br i1 [[EXIT_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
+; DEFAULT:       exit:
+; DEFAULT-NEXT:    ret void
+;
+; STRIDED-LABEL: @pr128062(
+; STRIDED-NEXT:  entry:
+; STRIDED-NEXT:    [[IV_START:%.*]] = and i64 [[N:%.*]], -4
+; STRIDED-NEXT:    [[A_EXT:%.*]] = zext i8 [[A:%.*]] to i16
+; STRIDED-NEXT:    [[TMP0:%.*]] = add i64 [[IV_START]], -4
+; STRIDED-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; STRIDED-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; STRIDED-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
+; STRIDED-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; STRIDED:       vector.ph:
+; STRIDED-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
+; STRIDED-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
+; STRIDED-NEXT:    [[TMP3:%.*]] = mul i64 [[N_VEC]], -4
+; STRIDED-NEXT:    [[TMP4:%.*]] = add i64 [[IV_START]], [[TMP3]]
+; STRIDED-NEXT:    [[TMP5:%.*]] = mul i64 [[N_VEC]], 4
+; STRIDED-NEXT:    [[TMP6:%.*]] = getelementptr i8, ptr [[DST_START:%.*]], i64 [[TMP5]]
+; STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[A_EXT]], i64 0
+; STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[A]], i64 0
+; STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
+; STRIDED:       vector.body:
+; STRIDED-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; STRIDED-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; STRIDED-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
+; STRIDED-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
+; STRIDED-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 8
+; STRIDED-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 12
+; STRIDED-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP7]]
+; STRIDED-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP8]]
+; STRIDED-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP9]]
+; STRIDED-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST_START]], i64 [[TMP10]]
+; STRIDED-NEXT:    [[TMP11:%.*]] = load i8, ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT:    [[TMP12:%.*]] = load i8, ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT:    [[TMP13:%.*]] = load i8, ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT:    [[TMP14:%.*]] = load i8, ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT:    [[TMP15:%.*]] = insertelement <4 x i8> poison, i8 [[TMP11]], i32 0
+; STRIDED-NEXT:    [[TMP16:%.*]] = insertelement <4 x i8> [[TMP15]], i8 [[TMP12]], i32 1
+; STRIDED-NEXT:    [[TMP17:%.*]] = insertelement <4 x i8> [[TMP16]], i8 [[TMP13]], i32 2
+; STRIDED-NEXT:    [[TMP18:%.*]] = insertelement <4 x i8> [[TMP17]], i8 [[TMP14]], i32 3
+; STRIDED-NEXT:    [[TMP19:%.*]] = zext <4 x i8> [[TMP18]] to <4 x i16>
+; STRIDED-NEXT:    [[TMP20:%.*]] = mul nuw <4 x i16> [[TMP19]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT:    [[TMP21:%.*]] = udiv <4 x i16> [[TMP20]], splat (i16 255)
+; STRIDED-NEXT:    [[TMP22:%.*]] = trunc nuw <4 x i16> [[TMP21]] to <4 x i8>
+; STRIDED-NEXT:    [[TMP23:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP22]]
+; STRIDED-NEXT:    [[TMP24:%.*]] = extractelement <4 x i8> [[TMP23]], i32 0
+; STRIDED-NEXT:    [[TMP25:%.*]] = extractelement <4 x i8> [[TMP23]], i32 1
+; STRIDED-NEXT:    [[TMP26:%.*]] = extractelement <4 x i8> [[TMP23]], i32 2
+; STRIDED-NEXT:    [[TMP27:%.*]] = extractelement <4 x i8> [[TMP23]], i32 3
+; STRIDED-NEXT:    store i8 [[TMP24]], ptr [[NEXT_GEP]], align 1
+; STRIDED-NEXT:    store i8 [[TMP25]], ptr [[NEXT_GEP3]], align 1
+; STRIDED-NEXT:    store i8 [[TMP26]], ptr [[NEXT_GEP4]], align 1
+; STRIDED-NEXT:    store i8 [[TMP27]], ptr [[NEXT_GEP5]], align 1
+; STRIDED-NEXT:    [[TMP28:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 2
+; STRIDED-NEXT:    [[TMP29:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 2
+; STRIDED-NEXT:    [[TMP30:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 2
+; STRIDED-NEXT:    [[TMP31:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 2
+; STRIDED-NEXT:    [[TMP32:%.*]] = load i8, ptr [[TMP28]], align 1
+; STRIDED-NEXT:    [[TMP33:%.*]] = load i8, ptr [[TMP29]], align 1
+; STRIDED-NEXT:    [[TMP34:%.*]] = load i8, ptr [[TMP30]], align 1
+; STRIDED-NEXT:    [[TMP35:%.*]] = load i8, ptr [[TMP31]], align 1
+; STRIDED-NEXT:    [[TMP36:%.*]] = insertelement <4 x i8> poison, i8 [[TMP32]], i32 0
+; STRIDED-NEXT:    [[TMP37:%.*]] = insertelement <4 x i8> [[TMP36]], i8 [[TMP33]], i32 1
+; STRIDED-NEXT:    [[TMP38:%.*]] = insertelement <4 x i8> [[TMP37]], i8 [[TMP34]], i32 2
+; STRIDED-NEXT:    [[TMP39:%.*]] = insertelement <4 x i8> [[TMP38]], i8 [[TMP35]], i32 3
+; STRIDED-NEXT:    [[TMP40:%.*]] = zext <4 x i8> [[TMP39]] to <4 x i16>
+; STRIDED-NEXT:    [[TMP41:%.*]] = mul nuw <4 x i16> [[TMP40]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT:    [[TMP42:%.*]] = udiv <4 x i16> [[TMP41]], splat (i16 255)
+; STRIDED-NEXT:    [[TMP43:%.*]] = trunc nuw <4 x i16> [[TMP42]] to <4 x i8>
+; STRIDED-NEXT:    [[TMP44:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP43]]
+; STRIDED-NEXT:    [[TMP45:%.*]] = extractelement <4 x i8> [[TMP44]], i32 0
+; STRIDED-NEXT:    [[TMP46:%.*]] = extractelement <4 x i8> [[TMP44]], i32 1
+; STRIDED-NEXT:    [[TMP47:%.*]] = extractelement <4 x i8> [[TMP44]], i32 2
+; STRIDED-NEXT:    [[TMP48:%.*]] = extractelement <4 x i8> [[TMP44]], i32 3
+; STRIDED-NEXT:    store i8 [[TMP45]], ptr [[TMP28]], align 1
+; STRIDED-NEXT:    store i8 [[TMP46]], ptr [[TMP29]], align 1
+; STRIDED-NEXT:    store i8 [[TMP47]], ptr [[TMP30]], align 1
+; STRIDED-NEXT:    store i8 [[TMP48]], ptr [[TMP31]], align 1
+; STRIDED-NEXT:    [[TMP49:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP]], i64 3
+; STRIDED-NEXT:    [[TMP50:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP3]], i64 3
+; STRIDED-NEXT:    [[TMP51:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP4]], i64 3
+; STRIDED-NEXT:    [[TMP52:%.*]] = getelementptr inbounds nuw i8, ptr [[NEXT_GEP5]], i64 3
+; STRIDED-NEXT:    [[TMP53:%.*]] = load i8, ptr [[TMP49]], align 1
+; STRIDED-NEXT:    [[TMP54:%.*]] = load i8, ptr [[TMP50]], align 1
+; STRIDED-NEXT:    [[TMP55:%.*]] = load i8, ptr [[TMP51]], align 1
+; STRIDED-NEXT:    [[TMP56:%.*]] = load i8, ptr [[TMP52]], align 1
+; STRIDED-NEXT:    [[TMP57:%.*]] = insertelement <4 x i8> poison, i8 [[TMP53]], i32 0
+; STRIDED-NEXT:    [[TMP58:%.*]] = insertelement <4 x i8> [[TMP57]], i8 [[TMP54]], i32 1
+; STRIDED-NEXT:    [[TMP59:%.*]] = insertelement <4 x i8> [[TMP58]], i8 [[TMP55]], i32 2
+; STRIDED-NEXT:    [[TMP60:%.*]] = insertelement <4 x i8> [[TMP59]], i8 [[TMP56]], i32 3
+; STRIDED-NEXT:    [[TMP61:%.*]] = zext <4 x i8> [[TMP60]] to <4 x i16>
+; STRIDED-NEXT:    [[TMP62:%.*]] = mul nuw <4 x i16> [[TMP61]], [[BROADCAST_SPLAT]]
+; STRIDED-NEXT:    [[TMP63:%.*]] = udiv <4 x i16> [[TMP62]], splat (i16 255)
+; STRIDED-NEXT:    [[TMP64:%.*]] = trunc nuw <4 x i16> [[TMP63]] to <4 x i8>
+; STRIDED-NEXT:    [[TMP65:%.*]] = add <4 x i8> [[BROADCAST_SPLAT2]], [[TMP64]]
+; STRIDED-NEXT:    [[TMP66:%.*]] = extractelement <4 x i8> [[TMP65]], i32 0
+; STRIDED-NEXT:    [[TMP67:%.*]] = extractelement <4 x i8> [[TMP65]], i32 1
+; STRIDED-NEXT:    [[TMP68:%.*]] = extractelement <4 x i8> [[TMP65]], i32 2
+; STRIDED-NEXT:    [[TMP69:%.*]] = extractelement <4 x i8> [[TMP65]], i32 3
+; STRIDED-NEXT:    store i8 [[TMP66]], ptr [[TMP49]], align 1
+; STRIDED-NEXT:    store i8 [[TMP67]], ptr [[TMP50]], align 1
+; STRIDED-NEXT:    store i8 [[TMP68]], ptr [[TMP51]], align 1
+; STRIDED-NEXT:    store i8 [[TMP69]], ptr [[TMP52]], align 1
+; STRIDED-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; STRIDED-NEXT:    [[TMP70:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; STRIDED-NEXT:    br i1 [[TMP70]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; STRIDED:       middle.block:
+; STRIDED-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
+; STRIDED-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
+; STRIDED:       scalar.ph:
+; STRIDED-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ [[IV_START]], [[ENTRY:%.*]] ]
+; STRIDED-NEXT:    [[BC_RESUME_VAL6:%.*]] = phi ptr [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[DST_START]], [[ENTRY]] ]
+; STRIDED-NEXT:    br label [[LOOP:%.*]]
+; STRIDED:       loop:
+; STRIDED-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; STRIDED-NEXT:    [[DST:%.*]] = phi ptr [ [[DST_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL6]], [[SCALAR_PH]] ]
+; STRIDED-NEXT:    [[DST_NEXT]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 4
+; STRIDED-NEXT:    [[LOAD_DST:%.*]] = load i8, ptr [[DST]], align 1
+; STRIDED-NEXT:    [[DST_EXT:%.*]] = zext i8 [[LOAD_DST]] to i16
+; STRIDED-NEXT:    [[MUL_DST_0:%.*]] = mul nuw i16 [[DST_EXT]], [[A_EXT]]
+; STRIDED-NEXT:    [[UDIV_0:%.*]] = udiv i16 [[MUL_DST_0]], 255
+; STRIDED-NEXT:    [[TRUNC_0:%.*]] = trunc nuw i16 [[UDIV_0]] to i8
+; STRIDED-NEXT:    [[VAL_0:%.*]] = add i8 [[A]], [[TRUNC_0]]
+; STRIDED-NEXT:    store i8 [[VAL_0]], ptr [[DST]], align 1
+; STRIDED-NEXT:    [[GEP_DST_1:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 1
+; STRIDED-NEXT:    [[LOAD_DST_1:%.*]] = load i8, ptr [[GEP_DST_1]], align 1
+; STRIDED-NEXT:    [[DST_1_EXT:%.*]] = zext i8 [[LOAD_DST_1]] to i16
+; STRIDED-NEXT:    [[MUL_DST_1:%.*]] = mul nuw i16 [[DST_1_EXT]], [[A_EXT]]
+; STRIDED-NEXT:    [[UDIV_1:%.*]] = udiv i16 [[MUL_DST_1]], 255
+; STRIDED-NEXT:    [[TRUNC_1:%.*]] = trunc nuw i16 [[UDIV_1]] to i8
+; STRIDED-NEXT:    [[VAL_1:%.*]] = add i8 [[A]], [[TRUNC_1]]
+; STRIDED-NEXT:    [[GEP_DST_2:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 2
+; STRIDED-NEXT:    [[LOAD_DST_2:%.*]] = load i8, ptr [[GEP_DST_2]], align 1
+; STRIDED-NEXT:    [[DST_2_EXT:%.*]] = zext i8 [[LOAD_DST_2]] to i16
+; STRIDED-NEXT:    [[MUL_DST_2:%.*]] = mul nuw i16 [[DST_2_EXT]], [[A_EXT]]
+; STRIDED-NEXT:    [[UDIV_2:%.*]] = udiv i16 [[MUL_DST_2]], 255
+; STRIDED-NEXT:    [[TRUNC_2:%.*]] = trunc nuw i16 [[UDIV_2]] to i8
+; STRIDED-NEXT:    [[VAL_2:%.*]] = add i8 [[A]], [[TRUNC_2]]
+; STRIDED-NEXT:    store i8 [[VAL_2]], ptr [[GEP_DST_2]], align 1
+; STRIDED-NEXT:    [[GEP_DST_3:%.*]] = getelementptr inbounds nuw i8, ptr [[DST]], i64 3
+; STRIDED-NEXT:    [[LOAD_DST_3:%.*]] = load i8, ptr [[GEP_DST_3]], align 1
+; STRIDED-NEXT:    [[DST_3_EXT:%.*]] = zext i8 [[LOAD_DST_3]] to i16
+; STRIDED-NEXT:    [[MUL_DST_3:%.*]] = mul nuw i16 [[DST_3_EXT]], [[A_EXT]]
+; STRIDED-NEXT:    [[UDIV_3:%.*]] = udiv i16 [[MUL_DST_3]], 255
+; STRIDED-NEXT:    [[TRUNC_3:%.*]] = trunc nuw i16 [[UDIV_3]] to i8
+; STRIDED-NEXT:    [[VAL_3:%.*]] = add i8 [[A]], [[TRUNC_3]]
+; STRIDED-NEXT:    store i8 [[VAL_3]], ptr [[GEP_DST_3]], align 1
+; STRIDED-NEXT:    [[IV_NEXT]] = add i64 [[IV]], -4
+; STRIDED-NEXT:    [[EXIT_COND:%.*]] = icmp eq i64 [[IV_NEXT]], 0
+; STRIDED-NEXT:    br i1 [[EXIT_COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
+; STRIDED:       exit:
+; STRIDED-NEXT:    ret void
+;
+entry:
+  %iv.start = and i64 %n, -4
+  %a.ext = zext i8 %a to i16
+  br label %loop
+
+loop:
+  %iv = phi i64 [ %iv.next, %loop ], [ %iv.start, %entry ]
+  %dst = phi ptr [ %dst.next, %loop ], [ %dst.start, %entry ]
+  %dst.next = getelementptr inbounds nuw i8, ptr %dst, i64 4
+  %load.dst = load i8, ptr %dst, align 1
+  %dst.ext = zext i8 %load.dst to i16
+  %mul.dst.0 = mul nuw i16 %dst.ext, %a.ext
+  %udiv.0 = udiv i16 %mul.dst.0, 255
+  %trunc.0 = trunc nuw i16 %udiv.0 to i8
+  %val.0 = add i8 %a, %trunc.0
+  store i8 %val.0, ptr %dst, align 1
+  %gep.dst.1 = getelementptr inbounds nuw i8, ptr %dst, i64 1
+  %load.dst.1 = load i8, ptr %gep.dst.1, align 1
+  %dst.1.ext = zext i8 %load.dst.1 to i16
+  %mul.dst.1 = mul nuw i16 %dst.1.ext, %a.ext
+  %udiv.1 = udiv i16 %mul.dst.1, 255
+  %trunc.1 = trunc nuw i16 %udiv.1 to i8
+  %val.1 = add i8 %a, %trunc.1
+  %gep.dst.2 = getelementptr inbounds nuw i8, ptr %dst, i64 2
+  %load.dst.2 = load i8, ptr %gep.dst.2, align 1
+  %dst.2.ext = zext i8 %load.dst.2 to i16
+  %mul.dst.2 = mul nuw i16 %dst.2.ext, %a.ext
+  %udiv.2 = udiv i16 %mul.dst.2, 255
+  %trunc.2 = trunc nuw i16 %udiv.2 to i8
+  %val.2 = add i8 %a, %trunc.2
+  store i8 %val.2, ptr %gep.dst.2, align 1
+  %gep.dst.3 = getelementptr inbounds nuw i8, ptr %dst, i64 3
+  %load.dst.3 = load i8, ptr %gep.dst.3, align 1
+  %dst.3.ext = zext i8 %load.dst.3 to i16
+  %mul.dst.3 = mul nuw i16 %dst.3.ext, %a.ext
+  %udiv.3 = udiv i16 %mul.dst.3, 255
+  %trunc.3 = trunc nuw i16 %udiv.3 to i8
+  %val.3 = add i8 %a, %trunc.3
+  store i8 %val.3, ptr %gep.dst.3, align 1
+  %iv.next = add i64 %iv, -4
+  %exit.cond = icmp eq i64 %iv.next, 0
+  br i1 %exit.cond, label %exit, label %loop
+
+exit:
+  ret void
+}



More information about the llvm-commits mailing list