[llvm] [LSR] Change the computing method of Cost.ImmCost and rewrite equality icmp (PR #96044)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 19 02:05:29 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Zhijin Zeng (zengdage)
<details>
<summary>Changes</summary>
1. Don't change` x == y` to `x-y != 0` if it's SCEV has been recorded. It will not reduce register requirements and may affect lsr-term-fold.
2. Change the computing method of `Cost.ImmCost` from `getSignificantBits` to `getIntImmCost` which depends on the target. For example, the total ImmCost of `imm(-3) + imm(-1) + imm(-2)` is 6 and the total ImmCost of `imm(3) + imm(1) + imm(2)` is 8 by `getSignificantBits` in RISC-V. But actually there have the same `Cost.ImmCost` and it will make negative effect on finding the best formula solution in `LSRInstance::Solve`.
---
Full diff: https://github.com/llvm/llvm-project/pull/96044.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (+11-3)
- (modified) llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll (+6-6)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll (+104)
- (modified) llvm/test/Transforms/LoopStrengthReduce/pr27056.ll (+8-11)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 3a98e257367b2..96ce3895cdec7 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1414,8 +1414,12 @@ void Cost::RateFormula(const Formula &F,
if (F.BaseGV)
C.ImmCost += 64; // Handle symbolic values conservatively.
// TODO: This should probably be the pointer size.
- else if (Offset != 0)
- C.ImmCost += APInt(64, Offset, true).getSignificantBits();
+ else if (Offset != 0) {
+ InstructionCost Cost = TTI->getIntImmCost(
+ APInt(64, Offset, true), Type::getInt64Ty(F.getType()->getContext()),
+ TTI::TCK_CodeSize);
+ C.ImmCost += *Cost.getValue();
+ }
// Check with target if this offset with this instruction is
// specifically not supported.
@@ -3336,6 +3340,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// For calculating baseline cost
SmallPtrSet<const SCEV *, 16> Regs;
DenseSet<const SCEV *> VisitedRegs;
+ DenseSet<const SCEV *> RepeatedSCEV;
DenseSet<size_t> VisitedLSRUse;
for (const IVStrideUse &U : IU) {
@@ -3372,7 +3377,9 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// in PowerPC, no need to generate initial formulae for it.
if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
continue;
- if (CI->isEquality()) {
+ // RepeatedSCEV.count - It's been calculated once, so could not reduce the
+ // register requirements.
+ if (CI->isEquality() && !RepeatedSCEV.count(S)) {
// Swap the operands if needed to put the OperandValToReplace on the
// left, for consistency.
Value *NV = CI->getOperand(1);
@@ -3455,6 +3462,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
InsertInitialFormula(S, LU, LUIdx);
CountRegisters(LU.Formulae.back(), LUIdx);
}
+ RepeatedSCEV.insert(S);
}
LLVM_DEBUG(print_fixups(dbgs()));
diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
index 31ca8eab33508..54875b54371d8 100644
--- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
+++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll
@@ -59,18 +59,18 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) {
; CHECK-NEXT: .LBB1_3: # %for.body.preheader.new
; CHECK-NEXT: li a3, 0
; CHECK-NEXT: andi a1, a1, -2
-; CHECK-NEXT: addi a4, a0, 4
+; CHECK-NEXT: mv a4, a0
; CHECK-NEXT: .LBB1_4: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: lw a5, -4(a4)
-; CHECK-NEXT: lw a6, 0(a4)
+; CHECK-NEXT: lw a5, 0(a4)
+; CHECK-NEXT: lw a6, 4(a4)
; CHECK-NEXT: addi a5, a5, 4
-; CHECK-NEXT: sw a5, -4(a4)
+; CHECK-NEXT: sw a5, 0(a4)
; CHECK-NEXT: addi a6, a6, 4
-; CHECK-NEXT: sw a6, 0(a4)
+; CHECK-NEXT: sw a6, 4(a4)
; CHECK-NEXT: addi a3, a3, 2
; CHECK-NEXT: addi a4, a4, 8
-; CHECK-NEXT: bne a1, a3, .LBB1_4
+; CHECK-NEXT: bne a3, a1, .LBB1_4
; CHECK-NEXT: .LBB1_5: # %for.cond.cleanup.loopexit.unr-lcssa
; CHECK-NEXT: beqz a2, .LBB1_7
; CHECK-NEXT: # %bb.6: # %for.body.epil
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
index a8446c5103176..205baa052c6d3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
@@ -381,4 +381,108 @@ t4: ; preds = %1
ret void
}
+define ptr @loop_repeated_scev_icmp_zero(ptr %first, ptr %end, i8 %data, i64 %num) {
+; CHECK-LABEL: @loop_repeated_scev_icmp_zero(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = getelementptr i8, ptr [[FIRST:%.*]], i64 [[NUM:%.*]]
+; CHECK-NEXT: br label [[T1:%.*]]
+; CHECK: t1:
+; CHECK-NEXT: [[T2:%.*]] = phi ptr [ [[T18:%.*]], [[T17:%.*]] ], [ [[FIRST]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[T3:%.*]] = load i8, ptr [[T2]], align 1
+; CHECK-NEXT: [[T4:%.*]] = icmp eq i8 [[T3]], [[DATA:%.*]]
+; CHECK-NEXT: br i1 [[T4]], label [[T20:%.*]], label [[T5:%.*]]
+; CHECK: t5:
+; CHECK-NEXT: [[T6:%.*]] = getelementptr inbounds i8, ptr [[T2]], i64 1
+; CHECK-NEXT: [[T7:%.*]] = load i8, ptr [[T6]], align 1
+; CHECK-NEXT: [[T8:%.*]] = icmp eq i8 [[T7]], [[DATA]]
+; CHECK-NEXT: br i1 [[T8]], label [[T21:%.*]], label [[T9:%.*]]
+; CHECK: t9:
+; CHECK-NEXT: [[T10:%.*]] = getelementptr inbounds i8, ptr [[T2]], i64 2
+; CHECK-NEXT: [[T11:%.*]] = load i8, ptr [[T10]], align 1
+; CHECK-NEXT: [[T12:%.*]] = icmp eq i8 [[T11]], [[DATA]]
+; CHECK-NEXT: br i1 [[T12]], label [[T23:%.*]], label [[T13:%.*]]
+; CHECK: t13:
+; CHECK-NEXT: [[T14:%.*]] = getelementptr inbounds i8, ptr [[T2]], i64 3
+; CHECK-NEXT: [[T15:%.*]] = load i8, ptr [[T14]], align 1
+; CHECK-NEXT: [[T16:%.*]] = icmp eq i8 [[T15]], [[DATA]]
+; CHECK-NEXT: br i1 [[T16]], label [[T25:%.*]], label [[T17]]
+; CHECK: t17:
+; CHECK-NEXT: [[T18]] = getelementptr inbounds i8, ptr [[T2]], i64 4
+; CHECK-NEXT: [[T19:%.*]] = icmp eq ptr [[T18]], [[CMP]]
+; CHECK-NEXT: br i1 [[T19]], label [[T20]], label [[T1]]
+; CHECK: t20:
+; CHECK-NEXT: [[T2_LCSSA:%.*]] = phi ptr [ [[T2]], [[T17]] ], [ [[T2]], [[T1]] ]
+; CHECK-NEXT: br label [[T27:%.*]]
+; CHECK: t21:
+; CHECK-NEXT: [[T2_LCSSA1:%.*]] = phi ptr [ [[T2]], [[T5]] ]
+; CHECK-NEXT: [[T22:%.*]] = getelementptr inbounds i8, ptr [[T2_LCSSA1]], i64 1
+; CHECK-NEXT: [[SCEVGEP5:%.*]] = getelementptr i8, ptr [[T2]], i64 1
+; CHECK-NEXT: br label [[T27]]
+; CHECK: t23:
+; CHECK-NEXT: [[T2_LCSSA2:%.*]] = phi ptr [ [[T2]], [[T9]] ]
+; CHECK-NEXT: [[T24:%.*]] = getelementptr inbounds i8, ptr [[T2_LCSSA2]], i64 2
+; CHECK-NEXT: [[SCEVGEP4:%.*]] = getelementptr i8, ptr [[T2]], i64 2
+; CHECK-NEXT: br label [[T27]]
+; CHECK: t25:
+; CHECK-NEXT: [[T2_LCSSA3:%.*]] = phi ptr [ [[T2]], [[T13]] ]
+; CHECK-NEXT: [[T26:%.*]] = getelementptr inbounds i8, ptr [[T2_LCSSA3]], i64 3
+; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[T2]], i64 3
+; CHECK-NEXT: br label [[T27]]
+; CHECK: t27:
+; CHECK-NEXT: [[T28:%.*]] = phi ptr [ [[T2_LCSSA]], [[T20]] ], [ [[SCEVGEP5]], [[T21]] ], [ [[SCEVGEP4]], [[T23]] ], [ [[SCEVGEP]], [[T25]] ]
+; CHECK-NEXT: ret ptr [[T28]]
+;
+entry:
+ %cmp = getelementptr i8, ptr %first, i64 %num
+ br label %t1
+
+t1:
+ %t2 = phi ptr [ %t18, %t17 ], [ %first, %entry ]
+ %t3 = load i8, ptr %t2, align 1
+ %t4 = icmp eq i8 %t3, %data
+ br i1 %t4, label %t20, label %t5
+
+t5: ; preds = %t1
+ %t6 = getelementptr inbounds i8, ptr %t2, i64 1
+ %t7 = load i8, ptr %t6, align 1
+ %t8 = icmp eq i8 %t7, %data
+ br i1 %t8, label %t21, label %t9
+
+t9: ; preds = %t5
+ %t10 = getelementptr inbounds i8, ptr %t2, i64 2
+ %t11 = load i8, ptr %t10, align 1
+ %t12 = icmp eq i8 %t11, %data
+ br i1 %t12, label %t23, label %t13
+
+t13: ; preds = %t9
+ %t14 = getelementptr inbounds i8, ptr %t2, i64 3
+ %t15 = load i8, ptr %t14, align 1
+ %t16 = icmp eq i8 %t15, %data
+ br i1 %t16, label %t25, label %t17
+
+t17: ; preds = %t13
+ %t18 = getelementptr inbounds i8, ptr %t2, i64 4
+ %t19 = icmp eq ptr %t18, %cmp
+ br i1 %t19, label %t20, label %t1
+
+t20:
+ br label %t27
+
+t21:
+ %t22 = getelementptr inbounds i8, ptr %t2, i64 1
+ br label %t27
+
+t23:
+ %t24 = getelementptr inbounds i8, ptr %t2, i64 2
+ br label %t27
+
+t25:
+ %t26 = getelementptr inbounds i8, ptr %t2, i64 3
+ br label %t27
+
+t27:
+ %t28 = phi ptr [ %t2, %t20 ], [ %t6, %t21 ], [ %t10, %t23 ], [ %t14, %t25 ]
+ ret ptr %t28
+}
+
declare i64 @llvm.vscale.i64()
diff --git a/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll b/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll
index 5f082dae7cf7b..ff0c72718350b 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/pr27056.ll
@@ -16,30 +16,27 @@ define void @b_copy_ctor() personality ptr @__CxxFrameHandler3 {
; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr @GV1, align 8
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
-; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[CALL_I_NOEXC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[LSR_IV2:%.*]] = inttoptr i64 [[LSR_IV]] to ptr
+; CHECK-NEXT: [[D_0:%.*]] = phi ptr [ [[TMP0]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR:%.*]], [[CALL_I_NOEXC:%.*]] ]
; CHECK-NEXT: invoke void @a_copy_ctor()
-; CHECK-NEXT: to label [[CALL_I_NOEXC]] unwind label [[CATCH_DISPATCH:%.*]]
+; CHECK-NEXT: to label [[CALL_I_NOEXC]] unwind label [[CATCH_DISPATCH:%.*]]
; CHECK: call.i.noexc:
-; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -16
+; CHECK-NEXT: [[INCDEC_PTR]] = getelementptr inbounds [[STRUCT_L:%.*]], ptr [[D_0]], i64 1
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: catch.dispatch:
-; CHECK-NEXT: [[TMP2:%.*]] = catchswitch within none [label %catch] unwind to caller
+; CHECK-NEXT: [[TMP1:%.*]] = catchswitch within none [label %catch] unwind to caller
; CHECK: catch:
-; CHECK-NEXT: [[TMP3:%.*]] = catchpad within [[TMP2]] [ptr null, i32 64, ptr null]
-; CHECK-NEXT: [[CMP16:%.*]] = icmp eq ptr [[LSR_IV2]], null
-; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[LSR_IV]], -1
-; CHECK-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP4]]
+; CHECK-NEXT: [[TMP2:%.*]] = catchpad within [[TMP1]] [ptr null, i32 64, ptr null]
+; CHECK-NEXT: [[CMP16:%.*]] = icmp eq ptr [[TMP0]], [[D_0]]
; CHECK-NEXT: br i1 [[CMP16]], label [[FOR_END:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
; CHECK: for.body.preheader:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[UGLYGEP]], @GV2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq ptr [[D_0]], @GV2
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_BODY]]
; CHECK: for.end.loopexit:
; CHECK-NEXT: br label [[FOR_END]]
; CHECK: for.end:
-; CHECK-NEXT: catchret from [[TMP3]] to label [[TRY_CONT:%.*]]
+; CHECK-NEXT: catchret from [[TMP2]] to label [[TRY_CONT:%.*]]
; CHECK: try.cont:
; CHECK-NEXT: ret void
;
``````````
</details>
https://github.com/llvm/llvm-project/pull/96044
More information about the llvm-commits
mailing list