[llvm] [VPlan] Explicitly check for wide IV in optimizeFindIVReductions. (PR #184437)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 3 12:50:27 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
getSCEVExprForVPValue can look through some bin-ops and still result in an AddRec, even if it is not a direct VPWidenIntOrFpInductionRecipe.
This can lead to incorrect transforms, e.g. if iv+1 is selected.
Fix this by explicitly checking for VPWidenIntOrFpInductionRecipe.
---
Patch is 110.38 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/184437.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+4-2)
- (modified) llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll (+9-7)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+135-675)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll (+28-97)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a24a483ab5e32..f32e4bf2aa27d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -5716,10 +5716,12 @@ void VPlanTransforms::optimizeFindIVReductions(VPlan &Plan,
assert(is_contained(CondSelect->getDefiningRecipe()->operands(), PhiR));
VPValue *IV = TrueVal == PhiR ? FalseVal : TrueVal;
+ if (!isa<VPWidenIntOrFpInductionRecipe>(IV))
+ continue;
const SCEV *IVSCEV = vputils::getSCEVExprForVPValue(IV, PSE, &L);
const SCEV *Step;
- if (!match(IVSCEV, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step))))
- continue;
+ bool Matched = match(IVSCEV, m_scev_AffineAddRec(m_SCEV(), m_SCEV(Step)));
+ assert(Matched && "Wide IV must be SCEV-able");
// Determine direction from SCEV step.
if (!SE.isKnownNonZero(Step))
diff --git a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll
index 0bbc965098766..cb531551b0637 100644
--- a/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll
+++ b/llvm/test/Transforms/LoopVectorize/find-last-iv-sinkable-expr.ll
@@ -309,7 +309,8 @@ define i64 @findlast_non_canonical_iv_with_expr(ptr %a, i64 %n) {
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[VEC_IND1:%.*]] = phi <4 x i64> [ <i64 10, i64 12, i64 14, i64 16>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP23:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ splat (i64 -1), %[[VECTOR_PH]] ], [ [[TMP28:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP23:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP27:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[INDEX]], 2
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 10, [[TMP5]]
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 0
@@ -330,20 +331,21 @@ define i64 @findlast_non_canonical_iv_with_expr(ptr %a, i64 %n) {
; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i64> [[TMP20]], i64 [[TMP17]], i32 3
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq <4 x i64> [[TMP21]], splat (i64 42)
; CHECK-NEXT: [[VEC_IND:%.*]] = add <4 x i64> [[VEC_IND1]], splat (i64 100)
-; CHECK-NEXT: [[TMP23]] = select <4 x i1> [[TMP22]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
+; CHECK-NEXT: [[TMP25:%.*]] = freeze <4 x i1> [[TMP22]]
+; CHECK-NEXT: [[TMP26:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP25]])
+; CHECK-NEXT: [[TMP27]] = select i1 [[TMP26]], <4 x i1> [[TMP22]], <4 x i1> [[TMP23]]
+; CHECK-NEXT: [[TMP28]] = select i1 [[TMP26]], <4 x i64> [[VEC_IND]], <4 x i64> [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add nuw nsw <4 x i64> [[VEC_IND1]], splat (i64 8)
; CHECK-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP24]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> [[TMP23]])
-; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i64 [[TMP27]], 0
-; CHECK-NEXT: [[TMP28:%.*]] = select i1 [[TMP26]], i64 [[TMP27]], i64 -1
+; CHECK-NEXT: [[TMP30:%.*]] = call i64 @llvm.experimental.vector.extract.last.active.v4i64(<4 x i64> [[TMP28]], <4 x i1> [[TMP27]], i64 -1)
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[DONE:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP4]], %[[MIDDLE_BLOCK]] ], [ 10, %[[ENTRY]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP28]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP30]], %[[MIDDLE_BLOCK]] ], [ -1, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -357,7 +359,7 @@ define i64 @findlast_non_canonical_iv_with_expr(ptr %a, i64 %n) {
; CHECK-NEXT: [[EXIT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
; CHECK-NEXT: br i1 [[EXIT]], label %[[DONE]], label %[[LOOP]], !llvm.loop [[LOOP11:![0-9]+]]
; CHECK: [[DONE]]:
-; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i64 [ [[SEL]], %[[LOOP]] ], [ [[TMP28]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i64 [ [[SEL]], %[[LOOP]] ], [ [[TMP30]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: ret i64 [[SEL_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
index dd3ad4d01b465..d6e95e1aeab4a 100644
--- a/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll
@@ -163,7 +163,8 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
; IC1VF4: [[VECTOR_BODY]]:
; IC1VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; IC1VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
+; IC1VF4-NEXT: [[TMP1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
; IC1VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; IC1VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
; IC1VF4-NEXT: [[TMP0:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[OFFSET_IDX]]
@@ -172,15 +173,16 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
; IC1VF4-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
; IC1VF4-NEXT: [[TMP3:%.*]] = icmp ugt <4 x i16> [[REVERSE]], [[BROADCAST_SPLAT]]
; IC1VF4-NEXT: [[TMP4:%.*]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -1)
-; IC1VF4-NEXT: [[TMP5]] = select <4 x i1> [[TMP3]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
+; IC1VF4-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP3]]
+; IC1VF4-NEXT: [[TMP9:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
+; IC1VF4-NEXT: [[TMP7]] = select i1 [[TMP9]], <4 x i1> [[TMP3]], <4 x i1> [[TMP1]]
+; IC1VF4-NEXT: [[TMP8]] = select i1 [[TMP9]], <4 x i16> [[TMP4]], <4 x i16> [[VEC_PHI]]
; IC1VF4-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; IC1VF4-NEXT: [[VEC_IND_NEXT]] = add nsw <4 x i16> [[VEC_IND]], splat (i16 -4)
; IC1VF4-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12
; IC1VF4-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; IC1VF4: [[MIDDLE_BLOCK]]:
-; IC1VF4-NEXT: [[TMP7:%.*]] = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> [[TMP5]])
-; IC1VF4-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne i16 [[TMP7]], 32767
-; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = select i1 [[RDX_SELECT_CMP]], i16 [[TMP7]], i16 0
+; IC1VF4-NEXT: [[RDX_SELECT:%.*]] = call i16 @llvm.experimental.vector.extract.last.active.v4i16(<4 x i16> [[TMP8]], <4 x i1> [[TMP7]], i16 0)
; IC1VF4-NEXT: br label %[[EXIT:.*]]
; IC1VF4: [[EXIT]]:
; IC1VF4-NEXT: ret i16 [[RDX_SELECT]]
@@ -194,27 +196,16 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
; IC4VF4-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
; IC4VF4-NEXT: br label %[[VECTOR_BODY:.*]]
; IC4VF4: [[VECTOR_BODY]]:
-; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44:.*]] ]
-; IC4VF4-NEXT: [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP108:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP109:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP110:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i16> [ splat (i16 32767), %[[VECTOR_PH]] ], [ [[TMP111:%.*]], %[[PRED_LOAD_CONTINUE44]] ]
-; IC4VF4-NEXT: [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], splat (i16 -4)
-; IC4VF4-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i16> [[STEP_ADD]], splat (i16 -4)
-; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i16> [[STEP_ADD_2]], splat (i16 -4)
+; IC4VF4-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE20:.*]] ]
+; IC4VF4-NEXT: [[STEP_ADD_3:%.*]] = phi <4 x i16> [ <i16 12, i16 11, i16 10, i16 9>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE20]] ]
+; IC4VF4-NEXT: [[VEC_PHI:%.*]] = phi <4 x i16> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP32:%.*]], %[[PRED_LOAD_CONTINUE20]] ]
+; IC4VF4-NEXT: [[TMP1:%.*]] = phi <4 x i1> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP31:%.*]], %[[PRED_LOAD_CONTINUE20]] ]
; IC4VF4-NEXT: [[DOTCAST:%.*]] = trunc i32 [[INDEX]] to i16
; IC4VF4-NEXT: [[OFFSET_IDX:%.*]] = sub i16 12, [[DOTCAST]]
; IC4VF4-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
; IC4VF4-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
; IC4VF4-NEXT: [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 0, i32 1, i32 2, i32 3>
-; IC4VF4-NEXT: [[VEC_IV8:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 4, i32 5, i32 6, i32 7>
-; IC4VF4-NEXT: [[VEC_IV11:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 8, i32 9, i32 10, i32 11>
-; IC4VF4-NEXT: [[VEC_IV14:%.*]] = add <4 x i32> [[BROADCAST_SPLAT5]], <i32 12, i32 13, i32 14, i32 15>
; IC4VF4-NEXT: [[TMP0:%.*]] = icmp ule <4 x i32> [[VEC_IV]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV8]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV11]], splat (i32 11)
-; IC4VF4-NEXT: [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV14]], splat (i32 11)
; IC4VF4-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0
; IC4VF4-NEXT: br i1 [[TMP4]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
; IC4VF4: [[PRED_LOAD_IF]]:
@@ -244,215 +235,50 @@ define i16 @select_decreasing_induction_icmp_table_i16(i16 noundef %val) {
; IC4VF4-NEXT: [[TMP20:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP19]], i32 2
; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE18]]
; IC4VF4: [[PRED_LOAD_CONTINUE18]]:
-; IC4VF4-NEXT: [[TMP21:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
+; IC4VF4-NEXT: [[TMP93:%.*]] = phi <4 x i16> [ [[TMP15]], %[[PRED_LOAD_CONTINUE16]] ], [ [[TMP20]], %[[PRED_LOAD_IF17]] ]
; IC4VF4-NEXT: [[TMP22:%.*]] = extractelement <4 x i1> [[TMP0]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20:.*]]
+; IC4VF4-NEXT: br i1 [[TMP22]], label %[[PRED_LOAD_IF19:.*]], label %[[PRED_LOAD_CONTINUE20]]
; IC4VF4: [[PRED_LOAD_IF19]]:
; IC4VF4-NEXT: [[TMP23:%.*]] = add i16 [[OFFSET_IDX]], -3
-; IC4VF4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP23]]
-; IC4VF4-NEXT: [[TMP25:%.*]] = load i16, ptr [[TMP24]], align 1
-; IC4VF4-NEXT: [[TMP26:%.*]] = insertelement <4 x i16> [[TMP21]], i16 [[TMP25]], i32 3
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE20]]
-; IC4VF4: [[PRED_LOAD_CONTINUE20]]:
-; IC4VF4-NEXT: [[TMP27:%.*]] = phi <4 x i16> [ [[TMP21]], %[[PRED_LOAD_CONTINUE18]] ], [ [[TMP26]], %[[PRED_LOAD_IF19]] ]
-; IC4VF4-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP28]], label %[[PRED_LOAD_IF21:.*]], label %[[PRED_LOAD_CONTINUE22:.*]]
-; IC4VF4: [[PRED_LOAD_IF21]]:
-; IC4VF4-NEXT: [[TMP29:%.*]] = add i16 [[OFFSET_IDX]], -4
-; IC4VF4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP29]]
-; IC4VF4-NEXT: [[TMP31:%.*]] = load i16, ptr [[TMP30]], align 1
-; IC4VF4-NEXT: [[TMP32:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i32 0
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE22]]
-; IC4VF4: [[PRED_LOAD_CONTINUE22]]:
-; IC4VF4-NEXT: [[TMP33:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE20]] ], [ [[TMP32]], %[[PRED_LOAD_IF21]] ]
-; IC4VF4-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP34]], label %[[PRED_LOAD_IF23:.*]], label %[[PRED_LOAD_CONTINUE24:.*]]
-; IC4VF4: [[PRED_LOAD_IF23]]:
-; IC4VF4-NEXT: [[TMP35:%.*]] = add i16 [[OFFSET_IDX]], -5
-; IC4VF4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP35]]
-; IC4VF4-NEXT: [[TMP37:%.*]] = load i16, ptr [[TMP36]], align 1
-; IC4VF4-NEXT: [[TMP38:%.*]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP37]], i32 1
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE24]]
-; IC4VF4: [[PRED_LOAD_CONTINUE24]]:
-; IC4VF4-NEXT: [[TMP39:%.*]] = phi <4 x i16> [ [[TMP33]], %[[PRED_LOAD_CONTINUE22]] ], [ [[TMP38]], %[[PRED_LOAD_IF23]] ]
-; IC4VF4-NEXT: [[TMP40:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP40]], label %[[PRED_LOAD_IF25:.*]], label %[[PRED_LOAD_CONTINUE26:.*]]
-; IC4VF4: [[PRED_LOAD_IF25]]:
-; IC4VF4-NEXT: [[TMP41:%.*]] = add i16 [[OFFSET_IDX]], -6
-; IC4VF4-NEXT: [[TMP42:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP41]]
-; IC4VF4-NEXT: [[TMP43:%.*]] = load i16, ptr [[TMP42]], align 1
-; IC4VF4-NEXT: [[TMP44:%.*]] = insertelement <4 x i16> [[TMP39]], i16 [[TMP43]], i32 2
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE26]]
-; IC4VF4: [[PRED_LOAD_CONTINUE26]]:
-; IC4VF4-NEXT: [[TMP45:%.*]] = phi <4 x i16> [ [[TMP39]], %[[PRED_LOAD_CONTINUE24]] ], [ [[TMP44]], %[[PRED_LOAD_IF25]] ]
-; IC4VF4-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP46]], label %[[PRED_LOAD_IF27:.*]], label %[[PRED_LOAD_CONTINUE28:.*]]
-; IC4VF4: [[PRED_LOAD_IF27]]:
-; IC4VF4-NEXT: [[TMP47:%.*]] = add i16 [[OFFSET_IDX]], -7
-; IC4VF4-NEXT: [[TMP48:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP47]]
-; IC4VF4-NEXT: [[TMP49:%.*]] = load i16, ptr [[TMP48]], align 1
-; IC4VF4-NEXT: [[TMP50:%.*]] = insertelement <4 x i16> [[TMP45]], i16 [[TMP49]], i32 3
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE28]]
-; IC4VF4: [[PRED_LOAD_CONTINUE28]]:
-; IC4VF4-NEXT: [[TMP51:%.*]] = phi <4 x i16> [ [[TMP45]], %[[PRED_LOAD_CONTINUE26]] ], [ [[TMP50]], %[[PRED_LOAD_IF27]] ]
-; IC4VF4-NEXT: [[TMP52:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP52]], label %[[PRED_LOAD_IF29:.*]], label %[[PRED_LOAD_CONTINUE30:.*]]
-; IC4VF4: [[PRED_LOAD_IF29]]:
-; IC4VF4-NEXT: [[TMP53:%.*]] = add i16 [[OFFSET_IDX]], -8
-; IC4VF4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP53]]
-; IC4VF4-NEXT: [[TMP55:%.*]] = load i16, ptr [[TMP54]], align 1
-; IC4VF4-NEXT: [[TMP56:%.*]] = insertelement <4 x i16> poison, i16 [[TMP55]], i32 0
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE30]]
-; IC4VF4: [[PRED_LOAD_CONTINUE30]]:
-; IC4VF4-NEXT: [[TMP57:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE28]] ], [ [[TMP56]], %[[PRED_LOAD_IF29]] ]
-; IC4VF4-NEXT: [[TMP58:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP58]], label %[[PRED_LOAD_IF31:.*]], label %[[PRED_LOAD_CONTINUE32:.*]]
-; IC4VF4: [[PRED_LOAD_IF31]]:
-; IC4VF4-NEXT: [[TMP59:%.*]] = add i16 [[OFFSET_IDX]], -9
-; IC4VF4-NEXT: [[TMP60:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP59]]
-; IC4VF4-NEXT: [[TMP61:%.*]] = load i16, ptr [[TMP60]], align 1
-; IC4VF4-NEXT: [[TMP62:%.*]] = insertelement <4 x i16> [[TMP57]], i16 [[TMP61]], i32 1
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE32]]
-; IC4VF4: [[PRED_LOAD_CONTINUE32]]:
-; IC4VF4-NEXT: [[TMP63:%.*]] = phi <4 x i16> [ [[TMP57]], %[[PRED_LOAD_CONTINUE30]] ], [ [[TMP62]], %[[PRED_LOAD_IF31]] ]
-; IC4VF4-NEXT: [[TMP64:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP64]], label %[[PRED_LOAD_IF33:.*]], label %[[PRED_LOAD_CONTINUE34:.*]]
-; IC4VF4: [[PRED_LOAD_IF33]]:
-; IC4VF4-NEXT: [[TMP65:%.*]] = add i16 [[OFFSET_IDX]], -10
-; IC4VF4-NEXT: [[TMP66:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP65]]
-; IC4VF4-NEXT: [[TMP67:%.*]] = load i16, ptr [[TMP66]], align 1
-; IC4VF4-NEXT: [[TMP68:%.*]] = insertelement <4 x i16> [[TMP63]], i16 [[TMP67]], i32 2
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE34]]
-; IC4VF4: [[PRED_LOAD_CONTINUE34]]:
-; IC4VF4-NEXT: [[TMP69:%.*]] = phi <4 x i16> [ [[TMP63]], %[[PRED_LOAD_CONTINUE32]] ], [ [[TMP68]], %[[PRED_LOAD_IF33]] ]
-; IC4VF4-NEXT: [[TMP70:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
-; IC4VF4-NEXT: br i1 [[TMP70]], label %[[PRED_LOAD_IF35:.*]], label %[[PRED_LOAD_CONTINUE36:.*]]
-; IC4VF4: [[PRED_LOAD_IF35]]:
-; IC4VF4-NEXT: [[TMP71:%.*]] = add i16 [[OFFSET_IDX]], -11
-; IC4VF4-NEXT: [[TMP72:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP71]]
-; IC4VF4-NEXT: [[TMP73:%.*]] = load i16, ptr [[TMP72]], align 1
-; IC4VF4-NEXT: [[TMP74:%.*]] = insertelement <4 x i16> [[TMP69]], i16 [[TMP73]], i32 3
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE36]]
-; IC4VF4: [[PRED_LOAD_CONTINUE36]]:
-; IC4VF4-NEXT: [[TMP75:%.*]] = phi <4 x i16> [ [[TMP69]], %[[PRED_LOAD_CONTINUE34]] ], [ [[TMP74]], %[[PRED_LOAD_IF35]] ]
-; IC4VF4-NEXT: [[TMP76:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
-; IC4VF4-NEXT: br i1 [[TMP76]], label %[[PRED_LOAD_IF37:.*]], label %[[PRED_LOAD_CONTINUE38:.*]]
-; IC4VF4: [[PRED_LOAD_IF37]]:
-; IC4VF4-NEXT: [[TMP77:%.*]] = add i16 [[OFFSET_IDX]], -12
-; IC4VF4-NEXT: [[TMP78:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP77]]
-; IC4VF4-NEXT: [[TMP79:%.*]] = load i16, ptr [[TMP78]], align 1
-; IC4VF4-NEXT: [[TMP80:%.*]] = insertelement <4 x i16> poison, i16 [[TMP79]], i32 0
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE38]]
-; IC4VF4: [[PRED_LOAD_CONTINUE38]]:
-; IC4VF4-NEXT: [[TMP81:%.*]] = phi <4 x i16> [ poison, %[[PRED_LOAD_CONTINUE36]] ], [ [[TMP80]], %[[PRED_LOAD_IF37]] ]
-; IC4VF4-NEXT: [[TMP82:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
-; IC4VF4-NEXT: br i1 [[TMP82]], label %[[PRED_LOAD_IF39:.*]], label %[[PRED_LOAD_CONTINUE40:.*]]
-; IC4VF4: [[PRED_LOAD_IF39]]:
-; IC4VF4-NEXT: [[TMP83:%.*]] = add i16 [[OFFSET_IDX]], -13
-; IC4VF4-NEXT: [[TMP84:%.*]] = getelementptr inbounds [13 x i16], ptr @table, i16 0, i16 [[TMP83]]
-; IC4VF4-NEXT: [[TMP85:%.*]] = load i16, ptr [[TMP84]], align 1
-; IC4VF4-NEXT: [[TMP86:%.*]] = insertelement <4 x i16> [[TMP81]], i16 [[TMP85]], i32 1
-; IC4VF4-NEXT: br label %[[PRED_LOAD_CONTINUE40]]
-; IC4VF4: [[PRED_LOAD_CONTINUE40]]:
-; IC4VF4-NEXT: [[TMP87:%.*]] = phi <4 x i16> [ [[TMP81]], %[[PRED_LOAD_CONTINUE38]] ], [ [[TMP86]], %[[PRED_LOAD_IF39]] ]
-; IC4VF4-NEXT: [[TMP88:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
-; IC4VF4-NEXT: br i1 [[TMP88]], label %[[PRED_LOAD_IF41:.*]], label %[[PRED_LOAD_CONTINUE42:.*]]
-; IC4VF4: [[PRED_LOAD_IF41]]:
-; IC4VF4-NEXT: [[TMP89:%.*]] = add i16 [[OFFSET_IDX]], -14
-; IC4VF4-NEXT: [[TMP90:%.*]] = getelementptr inbounds [13 x i16], ptr @t...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/184437
More information about the llvm-commits
mailing list