[PATCH] D66528: make MaxDepth in value tracking configurable

Wed Aug 21 06:04:43 PDT 2019

cjld created this revision.
cjld added a reviewer: MaskRay.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.

make MaxDepth in value tracking configurable,

A small MaxDepth will generate low-performance code, a example below:

  // b.cc
  #include <cstddef>
  #include <stdint.h>
  
  typedef long long index;
  
  extern "C" index g_tid;
  extern "C" index g_num;
  
  
  void add3(float* __restrict__ a, float* __restrict__ b, float* __restrict__ c) {
      index n = 64*1024;
      index m = 16*1024;
      index k = 4*1024;
      index tid = g_tid;
      index num = g_num;
      __builtin_assume_aligned(a, 32);
      __builtin_assume_aligned(b, 32);
      __builtin_assume_aligned(c, 32);
      for (index i0=tid*k; i0<m; i0+=num*k)
          for (index i1=0; i1<n*m; i1+=m)
              for (index i2=0; i2<k; i2++)
                  c[i1+i0+i2] = b[i0+i2] + a[i1+i0+i2];
  }

compile with `clang ./b.cc -Ofast -march=native -std=c++14 -S -o b.s`. (intel i7-7500U)
which yield:

  // b.s
  ......
  	vmovaps	-224(%rdi,%rbx,4), %ymm0
  	vmovups	-192(%rdi,%rbx,4), %ymm1
  	vmovups	-160(%rdi,%rbx,4), %ymm2
  	vmovups	-128(%rdi,%rbx,4), %ymm3
  	vaddps	-224(%rsi,%rbx,4), %ymm0, %ymm0
  	vaddps	-192(%rsi,%rbx,4), %ymm1, %ymm1
  	vaddps	-160(%rsi,%rbx,4), %ymm2, %ymm2
  	vaddps	-128(%rsi,%rbx,4), %ymm3, %ymm3
  	vmovaps	%ymm0, -224(%rdx,%rbx,4)
  	vmovups	%ymm1, -192(%rdx,%rbx,4)
  	vmovups	%ymm2, -160(%rdx,%rbx,4)
  	vmovups	%ymm3, -128(%rdx,%rbx,4)
  ......

expect:

  // b.s
  ......
  	vmovaps	-224(%rdi,%rbx,4), %ymm0
  	vmovaps	-192(%rdi,%rbx,4), %ymm1
  	vmovaps	-160(%rdi,%rbx,4), %ymm2
  	vmovaps	-128(%rdi,%rbx,4), %ymm3
  	vaddps	-224(%rsi,%rbx,4), %ymm0, %ymm0
  	vaddps	-192(%rsi,%rbx,4), %ymm1, %ymm1
  	vaddps	-160(%rsi,%rbx,4), %ymm2, %ymm2
  	vaddps	-128(%rsi,%rbx,4), %ymm3, %ymm3
  	vmovaps	%ymm0, -224(%rdx,%rbx,4)
  	vmovaps	%ymm1, -192(%rdx,%rbx,4)
  	vmovaps	%ymm2, -160(%rdx,%rbx,4)
  	vmovaps	%ymm3, -128(%rdx,%rbx,4)
  ......

This is because the MaxDepth is too small, llvm is unable to calculate the alignment info, compile with `clang ./b.cc -Ofast -march=native -std=c++14 -mllvm -value-tracking-max-depth=10 -S -o b.s`, which produces the expected asm code.


Repository:
  rL LLVM

https://reviews.llvm.org/D66528

Files:
  llvm/lib/Analysis/ValueTracking.cpp


Index: llvm/lib/Analysis/ValueTracking.cpp
===================================================================

--- llvm/lib/Analysis/ValueTracking.cpp
+++ llvm/lib/Analysis/ValueTracking.cpp
@@ -66,7 +66,7 @@
 #include "llvm/Support/KnownBits.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
-#include <array>
+#include <vector>
 #include <cassert>
 #include <cstdint>
 #include <iterator>
@@ -75,7 +75,8 @@
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
-const unsigned MaxDepth = 6;
+static cl::opt<unsigned> MaxDepth("value-tracking-max-depth",
+                                  cl::Hidden, cl::init(6));
 
 // Controls the number of uses of the value searched for possible
 // dominating comparisons.
@@ -115,31 +116,27 @@
   /// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
   /// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
   /// (all of which can call computeKnownBits), and so on.
-  std::array<const Value *, MaxDepth> Excluded;
+  std::vector<const Value *> Excluded;
 
   /// If true, it is safe to use metadata during simplification.
   InstrInfoQuery IIQ;
 
-  unsigned NumExcluded = 0;
-
   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
         const DominatorTree *DT, bool UseInstrInfo,
         OptimizationRemarkEmitter *ORE = nullptr)
-      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
+      : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {
+    Excluded.reserve(MaxDepth);
+  }
 
   Query(const Query &Q, const Value *NewExcl)
-      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ),
-        NumExcluded(Q.NumExcluded) {
+      : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ) {
+    Excluded.reserve(MaxDepth);
     Excluded = Q.Excluded;
-    Excluded[NumExcluded++] = NewExcl;
-    assert(NumExcluded <= Excluded.size());
+    Excluded.push_back(NewExcl);
   }
 
   bool isExcluded(const Value *Value) const {
-    if (NumExcluded == 0)
-      return false;
-    auto End = Excluded.begin() + NumExcluded;
-    return std::find(Excluded.begin(), End, Value) != End;
+    return std::find(Excluded.begin(), Excluded.end(), Value) != Excluded.end();
   }
 };
 
@@ -2662,7 +2659,6 @@
 /// through SExt instructions only if LookThroughSExt is true.
 bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
                            bool LookThroughSExt, unsigned Depth) {
-  const unsigned MaxDepth = 6;
 
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66528.216380.patch
Type: text/x-patch
Size: 2627 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190821/2cbc2b1a/attachment.bin>