[llvm] bb937e2 - [LV] Compute value of escaped induction based on the computed end value. (#110576)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 10 12:04:50 PDT 2024
Author: Florian Hahn
Date: 2024-10-10T20:04:46+01:00
New Revision: bb937e276da11c6d85318b32006f6510877c1a2c
URL: https://github.com/llvm/llvm-project/commit/bb937e276da11c6d85318b32006f6510877c1a2c
DIFF: https://github.com/llvm/llvm-project/commit/bb937e276da11c6d85318b32006f6510877c1a2c.diff
LOG: [LV] Compute value of escaped induction based on the computed end value. (#110576)
Update fixupIVUsers to compute the value for escaped inductions using
the already computed end value of the induction (EndValue), but
subtracting the step.
This results in slightly simpler codegen, as we avoid computing the full
transformed index at VectorTripCount - 1.
PR: https://github.com/llvm/llvm-project/pull/110576
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index db650b23e271e2..f2bee2c67a2353 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2747,17 +2747,24 @@ void InnerLoopVectorizer::fixupIVUsers(PHINode *OrigPhi,
if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp()))
B.setFastMathFlags(II.getInductionBinOp()->getFastMathFlags());
- Value *CountMinusOne = B.CreateSub(
- VectorTripCount, ConstantInt::get(VectorTripCount->getType(), 1));
- CountMinusOne->setName("cmo");
-
VPValue *StepVPV = Plan.getSCEVExpansion(II.getStep());
assert(StepVPV && "step must have been expanded during VPlan execution");
Value *Step = StepVPV->isLiveIn() ? StepVPV->getLiveInIRValue()
: State.get(StepVPV, VPLane(0));
- Value *Escape =
- emitTransformedIndex(B, CountMinusOne, II.getStartValue(), Step,
- II.getKind(), II.getInductionBinOp());
+ Value *Escape = nullptr;
+ if (EndValue->getType()->isIntegerTy())
+ Escape = B.CreateSub(EndValue, Step);
+ else if (EndValue->getType()->isPointerTy())
+ Escape = B.CreatePtrAdd(EndValue, B.CreateNeg(Step));
+ else if (EndValue->getType()->isFloatingPointTy()) {
+ Escape = B.CreateBinOp(II.getInductionBinOp()->getOpcode() ==
+ Instruction::FAdd
+ ? Instruction::FSub
+ : Instruction::FAdd,
+ EndValue, Step);
+ } else {
+ llvm_unreachable("all possible induction types must be handled");
+ }
Escape->setName("ind.escape");
MissingVals[UI] = Escape;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
index c28776e82776b7..64b69be5f52598 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-live-out-pointer-induction.ll
@@ -42,9 +42,7 @@ define ptr @test(ptr %start.1, ptr %start.2, ptr %end) {
; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
-; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
-; CHECK-NEXT: [[TMP37:%.*]] = mul i64 [[CMO]], 8
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[START_1]], i64 [[TMP37]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = getelementptr i8, ptr [[IND_END]], i64 -8
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[START_1]], [[ENTRY:%.*]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
index cb4f5f6d9eabaf..54dd9c870a1709 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll
@@ -208,25 +208,23 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTO_VEC-NEXT: [[VEC_IND:%.*]] = phi <4 x double> [ <double 0.000000e+00, double 3.000000e+00, double 6.000000e+00, double 9.000000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AUTO_VEC-NEXT: [[STEP_ADD:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 1.200000e+01, double 1.200000e+01, double 1.200000e+01, double 1.200000e+01>
-; AUTO_VEC-NEXT: [[STEP_ADD2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
-; AUTO_VEC-NEXT: [[STEP_ADD3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
+; AUTO_VEC-NEXT: [[STEP_ADD_2:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 2.400000e+01, double 2.400000e+01, double 2.400000e+01, double 2.400000e+01>
+; AUTO_VEC-NEXT: [[STEP_ADD_3:%.*]] = fadd fast <4 x double> [[VEC_IND]], <double 3.600000e+01, double 3.600000e+01, double 3.600000e+01, double 3.600000e+01>
; AUTO_VEC-NEXT: [[TMP1:%.*]] = getelementptr double, ptr [[A:%.*]], i64 [[INDEX]]
; AUTO_VEC-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[TMP1]], i64 32
; AUTO_VEC-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[TMP1]], i64 64
; AUTO_VEC-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[TMP1]], i64 96
; AUTO_VEC-NEXT: store <4 x double> [[VEC_IND]], ptr [[TMP1]], align 8
; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD]], ptr [[TMP2]], align 8
-; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD2]], ptr [[TMP3]], align 8
-; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD3]], ptr [[TMP4]], align 8
+; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_2]], ptr [[TMP3]], align 8
+; AUTO_VEC-NEXT: store <4 x double> [[STEP_ADD_3]], ptr [[TMP4]], align 8
; AUTO_VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
; AUTO_VEC-NEXT: [[VEC_IND_NEXT]] = fadd fast <4 x double> [[VEC_IND]], <double 4.800000e+01, double 4.800000e+01, double 4.800000e+01, double 4.800000e+01>
; AUTO_VEC-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; AUTO_VEC-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; AUTO_VEC: middle.block:
; AUTO_VEC-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
-; AUTO_VEC-NEXT: [[CMO:%.*]] = add nsw i64 [[N_VEC]], -1
-; AUTO_VEC-NEXT: [[DOTCAST6:%.*]] = sitofp i64 [[CMO]] to double
-; AUTO_VEC-NEXT: [[TMP6:%.*]] = fmul fast double [[DOTCAST6]], 3.000000e+00
+; AUTO_VEC-NEXT: [[IND_ESCAPE:%.*]] = fadd fast double [[TMP0]], -3.000000e+00
; AUTO_VEC-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; AUTO_VEC: for.body:
; AUTO_VEC-NEXT: [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
@@ -238,7 +236,7 @@ define double @external_use_with_fast_math(ptr %a, i64 %n) {
; AUTO_VEC-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[I_NEXT]], [[SMAX]]
; AUTO_VEC-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; AUTO_VEC: for.end:
-; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[TMP6]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ]
+; AUTO_VEC-NEXT: [[J_LCSSA:%.*]] = phi double [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ], [ [[J]], [[FOR_BODY]] ]
; AUTO_VEC-NEXT: ret double [[J_LCSSA]]
;
entry:
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index bf27c146ec9ce1..02fdbc05ed5188 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -63,7 +63,7 @@ for.end:
; CHECK-LABEL: @geppre
; CHECK-LABEL: middle.block:
-; CHECK: %ind.escape = getelementptr i8, ptr %ptr, i64 496
+; CHECK: %ind.escape = getelementptr i8, ptr %ind.end, i64 -16
; CHECK-LABEL: for.end:
; CHECK: %[[RET:.*]] = phi ptr [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
; CHECK: ret ptr %[[RET]]
@@ -85,9 +85,7 @@ for.end:
; CHECK-LABEL: @both
; CHECK-LABEL: middle.block:
-; CHECK: %[[END:.*]] = sub i64 %n.vec, 1
-; CHECK: %[[END_OFFSET:.*]] = mul i64 %[[END]], 4
-; CHECK: %ind.escape = getelementptr i8, ptr %base, i64 %[[END_OFFSET]]
+; CHECK: %ind.escape = getelementptr i8, ptr %ind.end1, i64 -4
; CHECK-LABEL: for.end:
; CHECK: %[[RET:.*]] = phi ptr [ %inc.lag1, %for.body ], [ %ind.escape, %middle.block ]
; CHECK: ret ptr %[[RET]]
@@ -142,7 +140,7 @@ for.end:
; CHECK: %[[N_VEC:.+]] = sub i32 %[[T5]], %[[N_MOD_VF]]
; CHECK: middle.block
; CHECK: %[[CMP:.+]] = icmp eq i32 %[[T5]], %[[N_VEC]]
-; CHECK: %ind.escape = add i32 %[[T15]],
+; CHECK: %ind.escape = sub i32 %ind.end8, -8
; CHECK: br i1 %[[CMP]], label %BB3, label %scalar.ph
define void @PR30742() {
BB0:
diff --git a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
index 80a6bb50ca91b6..d462d3aa650d28 100644
--- a/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
+++ b/llvm/test/Transforms/LoopVectorize/no-fold-tail-by-masking-iv-external-uses.ll
@@ -51,8 +51,7 @@ define i32 @test(ptr %arr, i64 %n) {
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
-; CHECK-NEXT: [[CMO:%.*]] = sub i64 [[N_VEC]], 1
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = add i64 1, [[CMO]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i64 [[IND_END]], 1
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOAD_VAL:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[PREHEADER]] ], [ 1, [[VECTOR_SCEVCHECK]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
index c0eb4ccdd6d7e5..af1c146c2c6c4c 100644
--- a/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr58811-scev-expansion.ll
@@ -28,10 +28,10 @@ define void @test1_pr58811() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
+; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: br i1 false, label [[LOOP_3_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_2_PREHEADER]] ]
@@ -123,28 +123,28 @@ define void @test2_pr58811() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
-; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
+; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[INDUCTION_IV_LCSSA]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[INDUCTION_IV_LCSSA]]
; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
; CHECK-NEXT: br label [[LOOP_3:%.*]]
; CHECK: loop.3:
-; CHECK-NEXT: [[INT16_TINDARRAYSAFEVAR_186_0747_1:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[UINT32_TVAR_177_2745_1:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[SUB93_1]] = sub i32 [[UINT32_TVAR_177_2745_1]], [[IV_2_LCSSA]]
-; CHECK-NEXT: [[INC_1]] = add i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 1
-; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[INT16_TINDARRAYSAFEVAR_186_0747_1]], 198
+; CHECK-NEXT: [[IV_4:%.*]] = phi i16 [ [[INC_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[IV_5:%.*]] = phi i32 [ [[SUB93_1:%.*]], [[LOOP_3]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
+; CHECK-NEXT: [[SUB93_1]] = sub i32 [[IV_5]], [[IV_2_LCSSA]]
+; CHECK-NEXT: [[INC_1]] = add i16 [[IV_4]], 1
+; CHECK-NEXT: [[CMP88_1:%.*]] = icmp ult i16 [[IV_4]], 198
; CHECK-NEXT: br i1 [[CMP88_1]], label [[LOOP_3]], label [[LOOP_4_PREHEADER]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: loop.4.preheader:
-; CHECK-NEXT: [[UINT32_TVAR_177_2745_1_LCSSA:%.*]] = phi i32 [ [[UINT32_TVAR_177_2745_1]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[IV_5_LCSSA:%.*]] = phi i32 [ [[IV_5]], [[LOOP_3]] ], [ [[IND_ESCAPE]], [[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label [[LOOP_4]]
; CHECK: loop.4:
-; CHECK-NEXT: [[UINT32_TVAR_177_2745_2:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ]
-; CHECK-NEXT: [[SUB93_2]] = sub i32 [[UINT32_TVAR_177_2745_2]], [[UINT32_TVAR_177_2745_1_LCSSA]]
+; CHECK-NEXT: [[IV_6:%.*]] = phi i32 [ [[SUB93_2]], [[LOOP_4]] ], [ 0, [[LOOP_4_PREHEADER]] ]
+; CHECK-NEXT: [[SUB93_2]] = sub i32 [[IV_6]], [[IV_5_LCSSA]]
; CHECK-NEXT: br i1 false, label [[LOOP_4]], label [[LOOP_1_HEADER_LOOPEXIT]]
;
entry:
@@ -201,10 +201,10 @@ define void @test3_pr58811() {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
-; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 196
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[IND_ESCAPE:%.*]] = mul i32 195, [[TMP3]]
+; CHECK-NEXT: [[IND_ESCAPE:%.*]] = sub i32 [[IND_END]], [[TMP3]]
; CHECK-NEXT: br i1 false, label [[LOOP_4_PREHEADER:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i16 [ 196, [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_3_PREHEADER]] ]
More information about the llvm-commits
mailing list