[llvm] 756af2f - [SCEV] SCEVExpander::isHighCostExpansionHelper(): cost-model add/mul

Tue Feb 25 12:07:15 PST 2020

Author: Roman Lebedev
Date: 2020-02-25T23:05:58+03:00
New Revision: 756af2f88bda16a70df200283f833227f336822a

URL: https://github.com/llvm/llvm-project/commit/756af2f88bda16a70df200283f833227f336822a
DIFF: https://github.com/llvm/llvm-project/commit/756af2f88bda16a70df200283f833227f336822a.diff

LOG: [SCEV] SCEVExpander::isHighCostExpansionHelper(): cost-model add/mul

Summary:
While this resolves the regression from D73722 in `llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll`,
this now regresses `llvm/test/Transforms/IndVarSimplify/elim-extend.ll` `@nestedIV` test,
we no longer can perform that expansion within default budget of `4`, but require budget of `6`.
That regression is being addressed by D73777.

The basic idea here is simple.
```
Op0,  Op1, Op2 ...
 |     |    |
 \--+--/    |
    |       |
    \---+---/
```
I.e. given N operands, we will have N-1 operations,
so we have to add cost of an add (mul) for **every** Op processed,
**except** the first one, plus we need to recurse into *every* Op.

I'm guessing there's already canonicalization that ensures we won't
have `1` operand in `scMulExpr`, and no `0` in `scAddExpr`/`scMulExpr`.

Reviewers: reames, mkazantsev, wmi, sanjoy

Reviewed By: mkazantsev

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73728

Added: 
    

Modified: 
    llvm/lib/Analysis/ScalarEvolutionExpander.cpp
    llvm/test/Transforms/IndVarSimplify/elim-extend.ll
    llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index abfb784f69f2..30634f70229c 100644

--- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -2219,6 +2219,40 @@ bool SCEVExpander::isHighCostExpansionHelper(
                                      TTI, Processed);
   }
 
+  if (S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr) {
+    const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S);
+
+    unsigned Opcode;
+    switch (S->getSCEVType()) {
+    case scAddExpr:
+      Opcode = Instruction::Add;
+      break;
+    case scMulExpr:
+      Opcode = Instruction::Mul;
+      break;
+    default:
+      llvm_unreachable("There are no other variants here.");
+    }
+
+    Type *OpType = NAry->getType();
+    int PairCost = TTI.getOperationCost(Opcode, OpType);
+    // TODO: this is a very pessimistic cost modelling for Mul,
+    // because of Bin Pow algorithm actually used by the expander,
+    // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN().
+
+    assert(NAry->getNumOperands() > 1 &&
+           "Nary expr should have more than 1 operand.");
+    for (const SCEV *Op : NAry->operands()) {
+      if (isHighCostExpansionHelper(Op, L, At, BudgetRemaining, TTI, Processed))
+        return true;
+      if (Op == *NAry->op_begin())
+        continue;
+      BudgetRemaining -= PairCost;
+    }
+
+    return BudgetRemaining < 0;
+  }
+
   // HowManyLessThans uses a Max expression whenever the loop is not guarded by
   // the exit condition.
   if (isa<SCEVMinMaxExpr>(S))

diff  --git a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
index 809ea9bec4d9..fec5ef48f4e1 100644
--- a/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/llvm/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -135,8 +135,8 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind {
 ; CHECK-NEXT:    store i8 0, i8* [[ADR2]]
 ; CHECK-NEXT:    [[ADR3:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV_NEXT]]
 ; CHECK-NEXT:    store i8 0, i8* [[ADR3]]
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
+; CHECK-NEXT:    [[INNERCMP:%.*]] = icmp sgt i64 [[TMP0]], [[INDVARS_IV_NEXT]]
+; CHECK-NEXT:    br i1 [[INNERCMP]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
 ; CHECK:       innerexit:
 ; CHECK-NEXT:    [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32

diff  --git a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll
index 08b3d62e518c..93cced597cac 100644
--- a/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll
+++ b/llvm/test/Transforms/IndVarSimplify/exit_value_test2.ll
@@ -19,9 +19,6 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
 ; CHECK-NEXT:    [[CMP8:%.*]] = icmp ugt i32 [[LEN:%.*]], 11
 ; CHECK-NEXT:    br i1 [[CMP8]], label [[WHILE_BODY_LR_PH:%.*]], label [[WHILE_END:%.*]]
 ; CHECK:       while.body.lr.ph:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], -12
-; CHECK-NEXT:    [[TMP1:%.*]] = udiv i32 [[TMP0]], 12
-; CHECK-NEXT:    [[TMP2:%.*]] = mul i32 [[TMP1]], 12
 ; CHECK-NEXT:    br label [[WHILE_BODY:%.*]]
 ; CHECK:       while.body:
 ; CHECK-NEXT:    [[KEYLEN_010:%.*]] = phi i32 [ [[LEN]], [[WHILE_BODY_LR_PH]] ], [ [[SUB:%.*]], [[WHILE_BODY]] ]
@@ -39,10 +36,10 @@ define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[SUB]], 11
 ; CHECK-NEXT:    br i1 [[CMP]], label [[WHILE_BODY]], label [[WHILE_COND_WHILE_END_CRIT_EDGE:%.*]]
 ; CHECK:       while.cond.while.end_crit_edge:
-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 [[TMP0]], [[TMP2]]
+; CHECK-NEXT:    [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[WHILE_BODY]] ]
 ; CHECK-NEXT:    br label [[WHILE_END]]
 ; CHECK:       while.end:
-; CHECK-NEXT:    [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[TMP3]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[KEYLEN_0_LCSSA:%.*]] = phi i32 [ [[SUB_LCSSA]], [[WHILE_COND_WHILE_END_CRIT_EDGE]] ], [ [[LEN]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    call void @_Z3mixRjj(i32* dereferenceable(4) [[A]], i32 [[KEYLEN_0_LCSSA]])
 ; CHECK-NEXT:    [[T4:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* [[T]])