[llvm] [LoopUnroll] Consider simplified operands while retrieving TTI instruction cost (PR #70929)

Sergey Kachkov via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 1 05:32:31 PDT 2023


https://github.com/skachkov-sc created https://github.com/llvm/llvm-project/pull/70929

Motivating example: https://godbolt.org/z/WcM6x1YPx
Here clang doesn't unroll loop with -Os, despite the fact that it will produce smaller and faster code. The issue is that we estimate cost of GEP as 1 after unrolling:
```
Loop Unroll: F[bar] Loop %for.body.i
  Loop Size = 5
Starting LoopUnroll profitability analysis...
 Analyzing iteration 0
Adding cost of instruction (iteration 0):   store i32 %x, ptr %arrayidx.i, align 4, !tbaa !7
Adding cost of instruction (iteration 0):   %arrayidx.i = getelementptr inbounds i32, ptr @array, i64 %indvars.iv.i
 Analyzing iteration 1
Adding cost of instruction (iteration 1):   store i32 %x, ptr %arrayidx.i, align 4, !tbaa !7
Adding cost of instruction (iteration 1):   %arrayidx.i = getelementptr inbounds i32, ptr @array, i64 %indvars.iv.i
 Analyzing iteration 2
Adding cost of instruction (iteration 2):   store i32 %x, ptr %arrayidx.i, align 4, !tbaa !7
Adding cost of instruction (iteration 2):   %arrayidx.i = getelementptr inbounds i32, ptr @array, i64 %indvars.iv.i
 Analyzing iteration 3
Adding cost of instruction (iteration 3):   store i32 %x, ptr %arrayidx.i, align 4, !tbaa !7
Adding cost of instruction (iteration 3):   %arrayidx.i = getelementptr inbounds i32, ptr @array, i64 %indvars.iv.i
  Exceeded threshold.. exiting.
  UnrolledCost: 8, MaxUnrolledLoopSize: 6
  will not try to unroll partially because -unroll-allow-partial not given
```
However, the more precise cost estimation is zero, because after unrolling we will not have non-constant index %indvars.iv.i, but some known compile-time constant: {0, 1, 2, 3}, and such addressing can be folded in given target architecture (RISC-V). My suggestion is to explicitly pass expected operands into TargetTransformInfo::getInstructionCost using SimplifiedValues map (e.g. for first iteration the mapping is i64 %indvars.iv.i -> i64 0).


>From 1b6cdeffa9c863f3224dc1ef1ff561a94ad20a28 Mon Sep 17 00:00:00 2001
From: Sergey Kachkov <sergey.kachkov at syntacore.com>
Date: Mon, 30 Oct 2023 12:28:14 +0300
Subject: [PATCH 1/2] [LoopUnroll][NFC] Add pre-commit test

---
 .../Transforms/LoopUnroll/RISCV/unroll-Os.ll  | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll

diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll b/llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll
new file mode 100644
index 000000000000000..d94cba659051f17
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2
+; RUN: opt < %s -S -mtriple=riscv64 -passes=loop-unroll | FileCheck %s
+
+; Function Attrs: optsize
+define void @foo(ptr %array, i32 %x) #0 {
+; CHECK-LABEL: define void @foo
+; CHECK-SAME: (ptr [[ARRAY:%.*]], i32 [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i32 [[X]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, ptr %array, i64 %indvars.iv
+  store i32 %x, ptr %arrayidx, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 4
+  br i1 %exitcond.not, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+  ret void
+}
+
+attributes #0 = { optsize }

>From f89182079d9640ba1a87b37461d2a79cabe2ff1d Mon Sep 17 00:00:00 2001
From: Sergey Kachkov <sergey.kachkov at syntacore.com>
Date: Fri, 27 Oct 2023 19:35:20 +0300
Subject: [PATCH 2/2] [LoopUnroll] Consider simplified operands while
 retrieving TTI instruction cost

Get more precise cost of instruction after LoopUnroll considering that
some operands of it can be simplified, e.g. induction variable will be
replaced by constant after full unrolling.
---
 llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp      | 10 +++++++++-
 llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll | 14 +++++++-------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 446aa497026d3fb..470bc3038669d83 100644
--- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -443,7 +443,15 @@ static std::optional<EstimatedUnrollCost> analyzeLoopUnrollCost(
 
         // First accumulate the cost of this instruction.
         if (!Cost.IsFree) {
-          UnrolledCost += TTI.getInstructionCost(I, CostKind);
+          // Consider simplified operands in instruction cost.
+          SmallVector<Value *, 4> Operands;
+          transform(I->operands(), std::back_inserter(Operands),
+                    [&](Value *Op) {
+                      if (auto Res = SimplifiedValues.lookup(Op))
+                        return Res;
+                      return Op;
+                    });
+          UnrolledCost += TTI.getInstructionCost(I, Operands, CostKind);
           LLVM_DEBUG(dbgs() << "Adding cost of instruction (iteration "
                             << Iteration << "): ");
           LLVM_DEBUG(I->dump());
diff --git a/llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll b/llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll
index d94cba659051f17..26de40bf1dc13e4 100644
--- a/llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll
+++ b/llvm/test/Transforms/LoopUnroll/RISCV/unroll-Os.ll
@@ -8,13 +8,13 @@ define void @foo(ptr %array, i32 %x) #0 {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    store i32 [[X]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 4
-; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
-; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    store i32 [[X]], ptr [[ARRAY]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 1
+; CHECK-NEXT:    store i32 [[X]], ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 2
+; CHECK-NEXT:    store i32 [[X]], ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[ARRAY]], i64 3
+; CHECK-NEXT:    store i32 [[X]], ptr [[ARRAYIDX_3]], align 4
 ; CHECK-NEXT:    ret void
 ;
 entry:



More information about the llvm-commits mailing list