[llvm] r305882 - [SCEV] Make MulOpsInlineThreshold lower to avoid excessive compilation time

Wed Jun 21 00:28:13 PDT 2017

Author: mkazantsev
Date: Wed Jun 21 02:28:13 2017
New Revision: 305882

URL: http://llvm.org/viewvc/llvm-project?rev=305882&view=rev
Log:
[SCEV] Make MulOpsInlineThreshold lower to avoid excessive compilation time

MulOpsInlineThreshold option of SCEV is defaulted to 1000, which is inadequately high.
When constructing SCEVs of expressions like:

  x1 = a * a
  x2 = x1 * x1
  x3 = x2 * x2
    ...

We actually have huge SCEVs with max allowed amount of operands inlined.
Such expressions are easy to get from unrolling of loops looking like

  x = a
  for (i = 0; i < n; i++)
    x = x * x

Or more tricky cases where big powers are involved. If some non-linear analysis
tries to work with a SCEV that has 1000 operands, it may lead to excessively long
compilation. The attached test does not pass within 1 minute with default threshold.

This patch decreases its default value to 32, which looks much more reasonable if we
use analyzes with complexity O(N^2) or O(N^3) working with SCEV.

Differential Revision: https://reviews.llvm.org/D34397

Added:
    llvm/trunk/test/Transforms/IndVarSimplify/huge_muls.ll
Modified:
    llvm/trunk/lib/Analysis/ScalarEvolution.cpp

Modified: llvm/trunk/lib/Analysis/ScalarEvolution.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/ScalarEvolution.cpp?rev=305882&r1=305881&r2=305882&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/ScalarEvolution.cpp (original)
+++ llvm/trunk/lib/Analysis/ScalarEvolution.cpp Wed Jun 21 02:28:13 2017
@@ -126,7 +126,7 @@ static cl::opt<bool>
 static cl::opt<unsigned> MulOpsInlineThreshold(
     "scev-mulops-inline-threshold", cl::Hidden,
     cl::desc("Threshold for inlining multiplication operands into a SCEV"),
-    cl::init(1000));
+    cl::init(32));
 
 static cl::opt<unsigned> AddOpsInlineThreshold(
     "scev-addops-inline-threshold", cl::Hidden,

Added: llvm/trunk/test/Transforms/IndVarSimplify/huge_muls.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/huge_muls.ll?rev=305882&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/huge_muls.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/huge_muls.ll Wed Jun 21 02:28:13 2017
@@ -0,0 +1,87 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This test takes excessively long time if SCEV tries to construct huge
+; SCEVMulExpr's (with ~1000 ops) due to non-linear analysis cost.
+define i32 @test() {
+; CHECK-LABEL: @test(
+bci_0:
+  br label %bci_12
+
+bci_133:                                          ; preds = %bci_127.unr-lcssa
+  ret i32 %tmp17
+
+bci_12:                                           ; preds = %bci_127.unr-lcssa, %bci_0
+  %indvars.iv184 = phi i64 [ %indvars.iv.next185, %bci_127.unr-lcssa ], [ 3, %bci_0 ]
+  %tmp1 = trunc i64 %indvars.iv184 to i32
+  br label %bci_55.postloop
+
+bci_127.unr-lcssa:                                ; preds = %bci_90.postloop
+  %indvars.iv.next185 = add nuw nsw i64 %indvars.iv184, 1
+  %tmp4 = icmp sgt i64 %indvars.iv184, 91
+  br i1 %tmp4, label %bci_133, label %bci_12
+
+bci_55.postloop:                                  ; preds = %bci_90.postloop, %bci_12
+  %indvars.iv180.postloop = phi i64 [ %indvars.iv.next181.postloop, %bci_90.postloop ], [ 15, %bci_12 ]
+  %local_2_16.postloop = phi i32 [ %tmp17, %bci_90.postloop ], [ 4, %bci_12 ]
+  %indvars.iv.next181.postloop = add nuw nsw i64 %indvars.iv180.postloop, 1
+  %tmp6 = load i32, i32 addrspace(1)* undef, align 4
+  %tmp7 = mul i32 %tmp6, %tmp1
+  br label %not_zero65.us.postloop
+
+not_zero65.us.postloop:                           ; preds = %not_zero65.us.postloop.1, %bci_55.postloop
+  %local_2_24.us.postloop = phi i32 [ %local_2_16.postloop, %bci_55.postloop ], [ %tmp49, %not_zero65.us.postloop.1 ]
+  %local_6_.us.postloop = phi i32 [ 3, %bci_55.postloop ], [ %tmp50, %not_zero65.us.postloop.1 ]
+  %tmp8 = mul i32 %tmp7, %local_2_24.us.postloop
+  %tmp9 = mul i32 %tmp8, %local_2_24.us.postloop
+  %tmp10 = mul i32 %tmp7, %tmp9
+  %tmp11 = mul i32 %tmp10, %tmp9
+  %tmp12 = mul i32 %tmp7, %tmp11
+  %tmp13 = mul i32 %tmp12, %tmp11
+  %tmp14 = mul i32 %tmp7, %tmp13
+  %tmp15 = mul i32 %tmp14, %tmp13
+  %tmp16 = mul i32 %tmp7, %tmp15
+  %tmp17 = mul i32 %tmp16, %tmp15
+  %tmp18 = icmp sgt i32 %local_6_.us.postloop, 82
+  br i1 %tmp18, label %bci_90.postloop, label %not_zero65.us.postloop.1
+
+bci_90.postloop:                                  ; preds = %not_zero65.us.postloop
+  %tmp19 = icmp sgt i64 %indvars.iv180.postloop, 68
+  br i1 %tmp19, label %bci_127.unr-lcssa, label %bci_55.postloop
+
+not_zero65.us.postloop.1:                         ; preds = %not_zero65.us.postloop
+  %tmp20 = mul i32 %tmp7, %tmp17
+  %tmp21 = mul i32 %tmp20, %tmp17
+  %tmp22 = mul i32 %tmp7, %tmp21
+  %tmp23 = mul i32 %tmp22, %tmp21
+  %tmp24 = mul i32 %tmp7, %tmp23
+  %tmp25 = mul i32 %tmp24, %tmp23
+  %tmp26 = mul i32 %tmp7, %tmp25
+  %tmp27 = mul i32 %tmp26, %tmp25
+  %tmp28 = mul i32 %tmp7, %tmp27
+  %tmp29 = mul i32 %tmp28, %tmp27
+  %tmp30 = mul i32 %tmp7, %tmp29
+  %tmp31 = mul i32 %tmp30, %tmp29
+  %tmp32 = mul i32 %tmp7, %tmp31
+  %tmp33 = mul i32 %tmp32, %tmp31
+  %tmp34 = mul i32 %tmp7, %tmp33
+  %tmp35 = mul i32 %tmp34, %tmp33
+  %tmp36 = mul i32 %tmp7, %tmp35
+  %tmp37 = mul i32 %tmp36, %tmp35
+  %tmp38 = mul i32 %tmp7, %tmp37
+  %tmp39 = mul i32 %tmp38, %tmp37
+  %tmp40 = mul i32 %tmp7, %tmp39
+  %tmp41 = mul i32 %tmp40, %tmp39
+  %tmp42 = mul i32 %tmp7, %tmp41
+  %tmp43 = mul i32 %tmp42, %tmp41
+  %tmp44 = mul i32 %tmp7, %tmp43
+  %tmp45 = mul i32 %tmp44, %tmp43
+  %tmp46 = mul i32 %tmp7, %tmp45
+  %tmp47 = mul i32 %tmp46, %tmp45
+  %tmp48 = mul i32 %tmp7, %tmp47
+  %tmp49 = mul i32 %tmp48, %tmp47
+  %tmp50 = add nsw i32 %local_6_.us.postloop, 20
+  br label %not_zero65.us.postloop
+}