[llvm] [VPlan] Skip uses-scalars restriction if one of ops needs broadcast. (PR #168246)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 15 14:15:43 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Update the logic in narrowToSingleScalar to allow narrowing even if not all users use scalars, if at least one of the operands already needs broadcasting.
In that case, there won't be any additional broadcasts introduced. This should allow removing the special handling for stores, which can introduce additional broadcasts currently.
---
Patch is 57.88 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168246.diff
14 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+26-25)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+12-12)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll (+251-11)
- (modified) llvm/test/Transforms/LoopVectorize/X86/cost-model.ll (+10-10)
- (modified) llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/cse-casts.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll (+10-14)
- (modified) llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll (+3-3)
- (modified) llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll (+6-6)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll (+2-2)
- (modified) llvm/test/Transforms/LoopVectorize/trunc-shifts.ll (+9-9)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index d05c22e3aeb61..6451620186f65 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1425,32 +1425,33 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
continue;
}
- // Skip recipes that aren't single scalars or don't have only their
- // scalar results used. In the latter case, we would introduce extra
- // broadcasts.
+ // Skip recipes that aren't single scalars or when conversion to
+ // single-scalar does not introduce additional broadcasts. That is, either
+ // only the scalars of the recipe are used, or at least one of the
+ // operands would require a broadcast. In the latter case, the
+ // single-scalar may need to be broadcasted, but another broadcast is
+ // removed. scalar results used. In the latter case, we would introduce
+ // extra broadcasts.
if (!vputils::isSingleScalar(RepOrWidenR) ||
- !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
- if (auto *Store = dyn_cast<VPWidenStoreRecipe>(U)) {
- // VPWidenStore doesn't have users, and stores are always
- // profitable to widen: hence, permitting address and mask
- // operands, and single-scalar stored values is an important leaf
- // condition. The assert must hold as we checked the RepOrWidenR
- // operand against vputils::isSingleScalar.
- assert(RepOrWidenR != Store->getStoredValue() ||
- vputils::isSingleScalar(Store->getStoredValue()));
- return true;
- }
-
- if (auto *VPI = dyn_cast<VPInstruction>(U)) {
- unsigned Opcode = VPI->getOpcode();
- if (Opcode == VPInstruction::ExtractLastElement ||
- Opcode == VPInstruction::ExtractLastLanePerPart ||
- Opcode == VPInstruction::ExtractPenultimateElement)
- return true;
- }
-
- return U->usesScalars(RepOrWidenR);
- }))
+ (!all_of(RepOrWidenR->users(),
+ [RepOrWidenR](const VPUser *U) {
+ if (auto *VPI = dyn_cast<VPInstruction>(U)) {
+ unsigned Opcode = VPI->getOpcode();
+ if (Opcode == VPInstruction::ExtractLastElement ||
+ Opcode == VPInstruction::ExtractLastLanePerPart ||
+ Opcode == VPInstruction::ExtractPenultimateElement)
+ return true;
+ }
+
+ return U->usesScalars(RepOrWidenR);
+ }) &&
+ none_of(RepOrWidenR->operands(), [RepOrWidenR](VPValue *Op) {
+ return Op->getSingleUser() == RepOrWidenR &&
+ ((Op->isLiveIn() &&
+ !isa<Constant>(Op->getLiveInIRValue())) ||
+ (isa<VPReplicateRecipe>(Op) &&
+ cast<VPReplicateRecipe>(Op)->isSingleScalar()));
+ })))
continue;
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 2f7e3568d5654..042341f27b873 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -531,10 +531,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
+; DEFAULT-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], 1
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
-; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double>
+; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[BROADCAST_SPLAT]] to <8 x double>
; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[NEXT_GEP]], align 8
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
@@ -563,10 +563,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
; PRED-NEXT: [[TMP12:%.*]] = load i16, ptr [[SRC]], align 2
-; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP12]], i64 0
+; PRED-NEXT: [[TMP11:%.*]] = or i16 [[TMP12]], 1
+; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP11]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
-; PRED-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
-; PRED-NEXT: [[TMP14:%.*]] = uitofp <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x double>
+; PRED-NEXT: [[TMP14:%.*]] = uitofp <vscale x 2 x i16> [[BROADCAST_SPLAT]] to <vscale x 2 x double>
; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr align 8 [[NEXT_GEP]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]])
@@ -672,10 +672,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP15]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
; DEFAULT-NEXT: [[TMP16:%.*]] = load float, ptr [[SRC_2]], align 4
-; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP16]], i64 0
+; DEFAULT-NEXT: [[TMP17:%.*]] = fmul float [[TMP16]], 0.000000e+00
+; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP17]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
-; DEFAULT-NEXT: [[TMP17:%.*]] = fmul <8 x float> [[BROADCAST_SPLAT]], zeroinitializer
-; DEFAULT-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[TMP17]])
+; DEFAULT-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[BROADCAST_SPLAT]])
; DEFAULT-NEXT: [[TMP19:%.*]] = load float, ptr [[SRC_3]], align 4
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x float> poison, float [[TMP19]], i64 0
; DEFAULT-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT10]], <8 x float> poison, <8 x i32> zeroinitializer
@@ -857,10 +857,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer
; PRED-NEXT: [[TMP19:%.*]] = load float, ptr [[SRC_2]], align 4
-; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP19]], i64 0
+; PRED-NEXT: [[TMP20:%.*]] = fmul float [[TMP19]], 0.000000e+00
+; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP20]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer
-; PRED-NEXT: [[TMP20:%.*]] = fmul <8 x float> [[BROADCAST_SPLAT]], zeroinitializer
-; PRED-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[TMP20]])
+; PRED-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[BROADCAST_SPLAT]])
; PRED-NEXT: [[TMP22:%.*]] = load float, ptr [[SRC_3]], align 4
; PRED-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x float> poison, float [[TMP22]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT10]], <8 x float> poison, <8 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
index 44ae1757ce6e6..f2c0ca30a6c18 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll
@@ -59,8 +59,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]]
; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
-; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0
-; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = add i64 [[Y]], 1
@@ -68,7 +66,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; VSCALEFORTUNING2-NEXT: [[TMP9:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP11:%.*]] = or <vscale x 4 x i32> [[TMP9]], [[TMP10]]
-; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT]]
+; VSCALEFORTUNING2-NEXT: [[TMP16:%.*]] = or i32 [[Z]], [[X]]
+; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP16]], i64 0
+; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; VSCALEFORTUNING2-NEXT: [[TMP13:%.*]] = and <vscale x 4 x i32> [[TMP12]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor <vscale x 4 x i32> [[TMP13]], splat (i32 1)
; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext <vscale x 4 x i32> [[TMP14]] to <vscale x 4 x i64>
@@ -180,8 +180,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4
-; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[Z]], i64 0
-; PRED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[X]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
@@ -195,7 +193,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2)
; PRED-NEXT: [[TMP13:%.*]] = lshr <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; PRED-NEXT: [[TMP14:%.*]] = shl <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
; PRED-NEXT: [[TMP15:%.*]] = or <vscale x 4 x i32> [[TMP13]], [[TMP14]]
-; PRED-NEXT: [[TMP16:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT]]
+; PRED-NEXT: [[TMP20:%.*]] = or i32 [[Z]], [[X]]
+; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP20]], i64 0
+; PRED-NEXT: [[TMP16:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP17:%.*]] = and <vscale x 4 x i32> [[TMP16]], splat (i32 1)
; PRED-NEXT: [[TMP18:%.*]] = xor <vscale x 4 x i32> [[TMP17]], splat (i32 1)
; PRED-NEXT: [[TMP19:%.*]] = zext <vscale x 4 x i32> [[TMP18]] to <vscale x 4 x i64>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index 8d4d282a5236d..0723f16677090 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -9,10 +9,10 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK: vector.ph:
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8> poison, i8 [[B]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[A]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[A]], 48
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP0]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = shl <vscale x 2 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
-; CHECK-NEXT: [[TMP6:%.*]] = ashr <vscale x 2 x i64> [[TMP5]], splat (i64 52)
+; CHECK-NEXT: [[TMP6:%.*]] = ashr <vscale x 2 x i64> [[BROADCAST_SPLAT2]], splat (i64 52)
; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 2 x i64> [[TMP6]] to <vscale x 2 x i32>
; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 2 x i8> [[BROADCAST_SPLAT]] to <vscale x 2 x i32>
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[P]], i64 0
diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
index 6ec010cdcc248..651e2ad5e74da 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll
@@ -686,16 +686,256 @@ exit:
; Test for https://github.com/llvm/llvm-project/issues/129236.
define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) {
; CHECK-LABEL: @cost_ashr_with_op_known_invariant_via_scev(
-; CHECK-NEXT: entry:
+; CHECK-NEXT: iter.check:
; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i16 0, 0
; CHECK-NEXT: [[CONV_I:%.*]] = sext i16 0 to i32
; CHECK-NEXT: [[CONV5_I:%.*]] = sext i8 [[A:%.*]] to i32
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK: vector.main.loop.iter.check:
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i1> poison, i1 [[CMP_I]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i1> [[BROADCAST_SPLATINSERT]], <32 x i1> poison, <32 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP60:%.*]] = xor <32 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
; CHECK-NEXT: br label [[LOOP_HEADER:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE62:%.*]] ]
+; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP60]], i32 0
+; CHECK-NEXT: br i1 [[TMP61]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
+; CHECK: pred.urem.if:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE]]
+; CHECK: pred.urem.continue:
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x i1> [[TMP60]], i32 1
+; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_UREM_IF1:%.*]], label [[PRED_UREM_CONTINUE2:%.*]]
+; CHECK: pred.urem.if1:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE2]]
+; CHECK: pred.urem.continue2:
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i1> [[TMP60]], i32 2
+; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4:%.*]]
+; CHECK: pred.urem.if3:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE4]]
+; CHECK: pred.urem.continue4:
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i1> [[TMP60]], i32 3
+; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF5:%.*]], label [[PRED_UREM_CONTINUE6:%.*]]
+; CHECK: pred.urem.if5:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE6]]
+; CHECK: pred.urem.continue6:
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i1> [[TMP60]], i32 4
+; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]]
+; CHECK: pred.urem.if7:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE8]]
+; CHECK: pred.urem.continue8:
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <32 x i1> [[TMP60]], i32 5
+; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]]
+; CHECK: pred.urem.if9:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE10]]
+; CHECK: pred.urem.continue10:
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i1> [[TMP60]], i32 6
+; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12:%.*]]
+; CHECK: pred.urem.if11:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE12]]
+; CHECK: pred.urem.continue12:
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i1> [[TMP60]], i32 7
+; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_UREM_IF13:%.*]], label [[PRED_UREM_CONTINUE14:%.*]]
+; CHECK: pred.urem.if13:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE14]]
+; CHECK: pred.urem.continue14:
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <32 x i1> [[TMP60]], i32 8
+; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF15:%.*]], label [[PRED_UREM_CONTINUE16:%.*]]
+; CHECK: pred.urem.if15:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE16]]
+; CHECK: pred.urem.continue16:
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i1> [[TMP60]], i32 9
+; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UREM_IF17:%.*]], label [[PRED_UREM_CONTINUE18:%.*]]
+; CHECK: pred.urem.if17:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE18]]
+; CHECK: pred.urem.continue18:
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i1> [[TMP60]], i32 10
+; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF19:%.*]], label [[PRED_UREM_CONTINUE20:%.*]]
+; CHECK: pred.urem.if19:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE20]]
+; CHECK: pred.urem.continue20:
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i1> [[TMP60]], i32 11
+; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UREM_IF21:%.*]], label [[PRED_UREM_CONTINUE22:%.*]]
+; CHECK: pred.urem.if21:
+; CHECK-NEXT: br label [[PRED_UREM_CONTINUE22]]
+; CHECK: pred.urem.con...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/168246
More information about the llvm-commits
mailing list