[llvm] [LoopVectorize] Don't discount instructions scalarized due to tail folding (PR #109289)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 19 07:20:03 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: John Brawn (john-brawn-arm)
<details>
<summary>Changes</summary>
When an instruction is scalarized due to tail folding the cost that we calculate (which is then returned by getWideningCost and thus getInstructionCost) is already the scalarized cost, so computePredInstDiscount shouldn't apply a scalarization discount to these instructions.
Fixes #<!-- -->66652
---
Patch is 87.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/109289.diff
5 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+8-4)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll (+43-302)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll (+12-304)
- (modified) llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll (+24-6)
- (modified) llvm/test/Transforms/LoopVectorize/X86/small-size.ll (+182-119)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 107fb38be31969..c64f8ccb3a84f7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5501,10 +5501,14 @@ InstructionCost LoopVectorizationCostModel::computePredInstDiscount(
// Scale the total scalar cost by block probability.
ScalarCost /= getReciprocalPredBlockProb();
- // Compute the discount. A non-negative discount means the vector version
- // of the instruction costs more, and scalarizing would be beneficial.
- Discount += VectorCost - ScalarCost;
- ScalarCosts[I] = ScalarCost;
+ // Compute the discount, unless this instruction must be scalarized due to
+ // tail folding, as then the vector cost is already the scalar cost. A
+ // non-negative discount means the vector version of the instruction costs
+ // more, and scalarizing would be beneficial.
+ if (!foldTailByMasking() || getWideningDecision(I, VF) != CM_Scalarize) {
+ Discount += VectorCost - ScalarCost;
+ ScalarCosts[I] = ScalarCost;
+ }
}
return Discount;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index 9910be7224674c..de1cae7383f863 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -529,93 +529,14 @@ define void @latch_branch_cost(ptr %dst) {
; PRED-LABEL: define void @latch_branch_cost(
; PRED-SAME: ptr [[DST:%.*]]) {
; PRED-NEXT: entry:
-; PRED-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; PRED: vector.ph:
-; PRED-NEXT: br label [[VECTOR_BODY:%.*]]
-; PRED: vector.body:
-; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
-; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
-; PRED-NEXT: [[TMP0:%.*]] = icmp ule <8 x i64> [[VEC_IND]], <i64 99, i64 99, i64 99, i64 99, i64 99, i64 99, i64 99, i64 99>
-; PRED-NEXT: [[TMP1:%.*]] = extractelement <8 x i1> [[TMP0]], i32 0
-; PRED-NEXT: br i1 [[TMP1]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
-; PRED: pred.store.if:
-; PRED-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0
-; PRED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP2]]
-; PRED-NEXT: store i8 0, ptr [[TMP3]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE]]
-; PRED: pred.store.continue:
-; PRED-NEXT: [[TMP4:%.*]] = extractelement <8 x i1> [[TMP0]], i32 1
-; PRED-NEXT: br i1 [[TMP4]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
-; PRED: pred.store.if1:
-; PRED-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 1
-; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP5]]
-; PRED-NEXT: store i8 0, ptr [[TMP6]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE2]]
-; PRED: pred.store.continue2:
-; PRED-NEXT: [[TMP7:%.*]] = extractelement <8 x i1> [[TMP0]], i32 2
-; PRED-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; PRED: pred.store.if3:
-; PRED-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 2
-; PRED-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP8]]
-; PRED-NEXT: store i8 0, ptr [[TMP9]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; PRED: pred.store.continue4:
-; PRED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP0]], i32 3
-; PRED-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
-; PRED: pred.store.if5:
-; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 3
-; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
-; PRED-NEXT: store i8 0, ptr [[TMP12]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE7]]
-; PRED: pred.store.continue6:
-; PRED-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP0]], i32 4
-; PRED-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
-; PRED: pred.store.if7:
-; PRED-NEXT: [[TMP14:%.*]] = add i64 [[INDEX]], 4
-; PRED-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP14]]
-; PRED-NEXT: store i8 0, ptr [[TMP15]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE8]]
-; PRED: pred.store.continue8:
-; PRED-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP0]], i32 5
-; PRED-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
-; PRED: pred.store.if9:
-; PRED-NEXT: [[TMP17:%.*]] = add i64 [[INDEX]], 5
-; PRED-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP17]]
-; PRED-NEXT: store i8 0, ptr [[TMP18]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE10]]
-; PRED: pred.store.continue10:
-; PRED-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP0]], i32 6
-; PRED-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
-; PRED: pred.store.if11:
-; PRED-NEXT: [[TMP20:%.*]] = add i64 [[INDEX]], 6
-; PRED-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP20]]
-; PRED-NEXT: store i8 0, ptr [[TMP21]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE12]]
-; PRED: pred.store.continue12:
-; PRED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP0]], i32 7
-; PRED-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE6]]
-; PRED: pred.store.if13:
-; PRED-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 7
-; PRED-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
-; PRED-NEXT: store i8 0, ptr [[TMP24]], align 1
-; PRED-NEXT: br label [[PRED_STORE_CONTINUE6]]
-; PRED: pred.store.continue14:
-; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
-; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; PRED-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 104
-; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
-; PRED: middle.block:
-; PRED-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; PRED: scalar.ph:
-; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 104, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; PRED-NEXT: br label [[FOR_BODY:%.*]]
; PRED: loop:
-; PRED-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; PRED-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
; PRED-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
; PRED-NEXT: store i8 0, ptr [[GEP]], align 1
; PRED-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[IV]], 1
; PRED-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 100
-; PRED-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; PRED-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]]
; PRED: exit:
; PRED-NEXT: ret void
;
@@ -800,27 +721,27 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
; PRED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 0
-; PRED-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META6:![0-9]+]]
+; PRED-NEXT: [[TMP16:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META4:![0-9]+]]
; PRED-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP16]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT28]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
-; PRED-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META9:![0-9]+]]
+; PRED-NEXT: [[TMP17:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META7:![0-9]+]]
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP17]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP18:%.*]] = or <vscale x 4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]]
-; PRED-NEXT: [[TMP19:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META11:![0-9]+]]
+; PRED-NEXT: [[TMP19:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META9:![0-9]+]]
; PRED-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP19]], i64 0
; PRED-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT30]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; PRED-NEXT: [[TMP20:%.*]] = icmp ugt <vscale x 4 x i32> [[BROADCAST_SPLAT31]], [[TMP18]]
; PRED-NEXT: [[TMP21:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP20]], <vscale x 4 x i1> zeroinitializer
; PRED-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[D]], i64 [[TMP15]]
-; PRED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x ptr> [[BROADCAST_SPLAT33]], i32 4, <vscale x 4 x i1> [[TMP21]]), !alias.scope [[META13:![0-9]+]], !noalias [[META15:![0-9]+]]
+; PRED-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x ptr> [[BROADCAST_SPLAT33]], i32 4, <vscale x 4 x i1> [[TMP21]]), !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
; PRED-NEXT: [[TMP23:%.*]] = getelementptr i32, ptr [[TMP22]], i32 0
-; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP23]], i32 4, <vscale x 4 x i1> [[TMP21]]), !alias.scope [[META17:![0-9]+]], !noalias [[META18:![0-9]+]]
+; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP23]], i32 4, <vscale x 4 x i1> [[TMP21]]), !alias.scope [[META15:![0-9]+]], !noalias [[META16:![0-9]+]]
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP14]])
; PRED-NEXT: [[TMP24:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer)
; PRED-NEXT: [[TMP25:%.*]] = extractelement <vscale x 4 x i1> [[TMP24]], i32 0
-; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
+; PRED-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; PRED: middle.block:
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; PRED: scalar.ph:
@@ -842,7 +763,7 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
; PRED: loop.latch:
; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; PRED-NEXT: [[C_1:%.*]] = icmp eq i64 [[IV]], [[N]]
-; PRED-NEXT: br i1 [[C_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP20:![0-9]+]]
+; PRED-NEXT: br i1 [[C_1]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP18:![0-9]+]]
; PRED: exit:
; PRED-NEXT: ret i32 0
;
@@ -959,7 +880,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]])
; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], shufflevector (<vscale x 2 x i1> insertelement (<vscale x 2 x i1> poison, i1 true, i64 0), <vscale x 2 x i1> poison, <vscale x 2 x i32> zeroinitializer)
; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[TMP16]], i32 0
-; PRED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; PRED-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
; PRED: middle.block:
; PRED-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
; PRED: scalar.ph:
@@ -979,7 +900,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
; PRED-NEXT: [[PTR_IV_NEXT]] = getelementptr i8, ptr [[PTR_IV]], i64 8
; PRED-NEXT: [[IV_CLAMP:%.*]] = and i64 [[IV]], 4294967294
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_CLAMP]], 512
-; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP22:![0-9]+]]
+; PRED-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP20:![0-9]+]]
; PRED: exit:
; PRED-NEXT: ret void
;
@@ -1003,208 +924,34 @@ exit:
ret void
}
-define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
-; DEFAULT-LABEL: define void @low_trip_count_fold_tail_scalarized_store(
+define void @low_trip_count_store(ptr %dst) {
+; DEFAULT-LABEL: define void @low_trip_count_store(
; DEFAULT-SAME: ptr [[DST:%.*]]) {
; DEFAULT-NEXT: entry:
-; DEFAULT-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; DEFAULT: vector.ph:
-; DEFAULT-NEXT: br label [[VECTOR_BODY:%.*]]
-; DEFAULT: vector.body:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE14:%.*]] ]
-; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE14]] ]
-; DEFAULT-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i64> [[VEC_IND]], <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>
-; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
-; DEFAULT-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
-; DEFAULT: pred.store.if:
-; DEFAULT-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = add i8 [[TMP0]], 0
-; DEFAULT-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE]]
-; DEFAULT: pred.store.continue:
-; DEFAULT-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
-; DEFAULT-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
-; DEFAULT: pred.store.if1:
-; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
-; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
-; DEFAULT-NEXT: [[TMP9:%.*]] = add i8 [[TMP0]], 1
-; DEFAULT-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE2]]
-; DEFAULT: pred.store.continue2:
-; DEFAULT-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
-; DEFAULT-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
-; DEFAULT: pred.store.if3:
-; DEFAULT-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2
-; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
-; DEFAULT-NEXT: [[TMP13:%.*]] = add i8 [[TMP0]], 2
-; DEFAULT-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE4]]
-; DEFAULT: pred.store.continue4:
-; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
-; DEFAULT-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
-; DEFAULT: pred.store.if5:
-; DEFAULT-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 3
-; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
-; DEFAULT-NEXT: [[TMP17:%.*]] = add i8 [[TMP0]], 3
-; DEFAULT-NEXT: store i8 [[TMP17]], ptr [[TMP16]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE6]]
-; DEFAULT: pred.store.continue6:
-; DEFAULT-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
-; DEFAULT-NEXT: br i1 [[TMP18]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
-; DEFAULT: pred.store.if7:
-; DEFAULT-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 4
-; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]]
-; DEFAULT-NEXT: [[TMP21:%.*]] = add i8 [[TMP0]], 4
-; DEFAULT-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE8]]
-; DEFAULT: pred.store.continue8:
-; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
-; DEFAULT-NEXT: br i1 [[TMP22]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
-; DEFAULT: pred.store.if9:
-; DEFAULT-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5
-; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
-; DEFAULT-NEXT: [[TMP25:%.*]] = add i8 [[TMP0]], 5
-; DEFAULT-NEXT: store i8 [[TMP25]], ptr [[TMP24]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE10]]
-; DEFAULT: pred.store.continue10:
-; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
-; DEFAULT-NEXT: br i1 [[TMP26]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
-; DEFAULT: pred.store.if11:
-; DEFAULT-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 6
-; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
-; DEFAULT-NEXT: [[TMP29:%.*]] = add i8 [[TMP0]], 6
-; DEFAULT-NEXT: store i8 [[TMP29]], ptr [[TMP28]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE12]]
-; DEFAULT: pred.store.continue12:
-; DEFAULT-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
-; DEFAULT-NEXT: br i1 [[TMP30]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14]]
-; DEFAULT: pred.store.if13:
-; DEFAULT-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 7
-; DEFAULT-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP31]]
-; DEFAULT-NEXT: [[TMP33:%.*]] = add i8 [[TMP0]], 7
-; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1
-; DEFAULT-NEXT: br label [[PRED_STORE_CONTINUE14]]
-; DEFAULT: pred.store.continue14:
-; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>
-; DEFAULT-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
-; DEFAULT-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
-; DEFAULT: middle.block:
-; DEFAULT-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
-; DEFAULT: scalar.ph:
-; DEFAULT-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 8, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; DEFAULT-NEXT: br label [[LOOP:%.*]]
; DEFAULT: loop:
-; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; DEFAULT-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
; DEFAULT-NEXT: [[IV_TRUNC:%.*]] = trunc i64 [[IV]] to i8
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[IV]]
; DEFAULT-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1
; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7
-; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
; DEFAULT: exit:
; DEFAULT-NEXT: ret void
;
-; PRED-LABEL: define void @low_trip_count_fold_tail_scalarized_store(
+; PRED-LABEL: define void @low_trip_count_store(
; PRED-SAME: ptr [[DST:%.*]]) {
; PRED-NEXT: entry:
-; PRED-NEXT: br i1 false, label [[SCALAR_PH...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/109289
More information about the llvm-commits
mailing list