[PATCH] D66528: make MaxDepth in value tracking configurable
Dun via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 21 06:04:43 PDT 2019
cjld created this revision.
cjld added a reviewer: MaskRay.
Herald added subscribers: llvm-commits, hiraditya.
Herald added a project: LLVM.
make MaxDepth in value tracking configurable,
A small MaxDepth will generate low-performance code, a example below:
// b.cc
#include <cstddef>
#include <stdint.h>
typedef long long index;
extern "C" index g_tid;
extern "C" index g_num;
void add3(float* __restrict__ a, float* __restrict__ b, float* __restrict__ c) {
index n = 64*1024;
index m = 16*1024;
index k = 4*1024;
index tid = g_tid;
index num = g_num;
__builtin_assume_aligned(a, 32);
__builtin_assume_aligned(b, 32);
__builtin_assume_aligned(c, 32);
for (index i0=tid*k; i0<m; i0+=num*k)
for (index i1=0; i1<n*m; i1+=m)
for (index i2=0; i2<k; i2++)
c[i1+i0+i2] = b[i0+i2] + a[i1+i0+i2];
}
compile with `clang ./b.cc -Ofast -march=native -std=c++14 -S -o b.s`. (intel i7-7500U)
which yield:
// b.s
......
vmovaps -224(%rdi,%rbx,4), %ymm0
vmovups -192(%rdi,%rbx,4), %ymm1
vmovups -160(%rdi,%rbx,4), %ymm2
vmovups -128(%rdi,%rbx,4), %ymm3
vaddps -224(%rsi,%rbx,4), %ymm0, %ymm0
vaddps -192(%rsi,%rbx,4), %ymm1, %ymm1
vaddps -160(%rsi,%rbx,4), %ymm2, %ymm2
vaddps -128(%rsi,%rbx,4), %ymm3, %ymm3
vmovaps %ymm0, -224(%rdx,%rbx,4)
vmovups %ymm1, -192(%rdx,%rbx,4)
vmovups %ymm2, -160(%rdx,%rbx,4)
vmovups %ymm3, -128(%rdx,%rbx,4)
......
expect:
// b.s
......
vmovaps -224(%rdi,%rbx,4), %ymm0
vmovaps -192(%rdi,%rbx,4), %ymm1
vmovaps -160(%rdi,%rbx,4), %ymm2
vmovaps -128(%rdi,%rbx,4), %ymm3
vaddps -224(%rsi,%rbx,4), %ymm0, %ymm0
vaddps -192(%rsi,%rbx,4), %ymm1, %ymm1
vaddps -160(%rsi,%rbx,4), %ymm2, %ymm2
vaddps -128(%rsi,%rbx,4), %ymm3, %ymm3
vmovaps %ymm0, -224(%rdx,%rbx,4)
vmovaps %ymm1, -192(%rdx,%rbx,4)
vmovaps %ymm2, -160(%rdx,%rbx,4)
vmovaps %ymm3, -128(%rdx,%rbx,4)
......
This is because the MaxDepth is too small, llvm is unable to calculate the alignment info, compile with `clang ./b.cc -Ofast -march=native -std=c++14 -mllvm -value-tracking-max-depth=10 -S -o b.s`, which produces the expected asm code.
Repository:
rL LLVM
https://reviews.llvm.org/D66528
Files:
llvm/lib/Analysis/ValueTracking.cpp
Index: llvm/lib/Analysis/ValueTracking.cpp
===================================================================
--- llvm/lib/Analysis/ValueTracking.cpp
+++ llvm/lib/Analysis/ValueTracking.cpp
@@ -66,7 +66,7 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/MathExtras.h"
#include <algorithm>
-#include <array>
+#include <vector>
#include <cassert>
#include <cstdint>
#include <iterator>
@@ -75,7 +75,8 @@
using namespace llvm;
using namespace llvm::PatternMatch;
-const unsigned MaxDepth = 6;
+static cl::opt<unsigned> MaxDepth("value-tracking-max-depth",
+ cl::Hidden, cl::init(6));
// Controls the number of uses of the value searched for possible
// dominating comparisons.
@@ -115,31 +116,27 @@
/// bits in x, etc. Regarding the mutual recursion, computeKnownBits can call
/// isKnownNonZero, which calls computeKnownBits and isKnownToBeAPowerOfTwo
/// (all of which can call computeKnownBits), and so on.
- std::array<const Value *, MaxDepth> Excluded;
+ std::vector<const Value *> Excluded;
/// If true, it is safe to use metadata during simplification.
InstrInfoQuery IIQ;
- unsigned NumExcluded = 0;
-
Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
const DominatorTree *DT, bool UseInstrInfo,
OptimizationRemarkEmitter *ORE = nullptr)
- : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {}
+ : DL(DL), AC(AC), CxtI(CxtI), DT(DT), ORE(ORE), IIQ(UseInstrInfo) {
+ Excluded.reserve(MaxDepth);
+ }
Query(const Query &Q, const Value *NewExcl)
- : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ),
- NumExcluded(Q.NumExcluded) {
+ : DL(Q.DL), AC(Q.AC), CxtI(Q.CxtI), DT(Q.DT), ORE(Q.ORE), IIQ(Q.IIQ) {
+ Excluded.reserve(MaxDepth);
Excluded = Q.Excluded;
- Excluded[NumExcluded++] = NewExcl;
- assert(NumExcluded <= Excluded.size());
+ Excluded.push_back(NewExcl);
}
bool isExcluded(const Value *Value) const {
- if (NumExcluded == 0)
- return false;
- auto End = Excluded.begin() + NumExcluded;
- return std::find(Excluded.begin(), End, Value) != End;
+ return std::find(Excluded.begin(), Excluded.end(), Value) != Excluded.end();
}
};
@@ -2662,7 +2659,6 @@
/// through SExt instructions only if LookThroughSExt is true.
bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
bool LookThroughSExt, unsigned Depth) {
- const unsigned MaxDepth = 6;
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D66528.216380.patch
Type: text/x-patch
Size: 2627 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190821/2cbc2b1a/attachment.bin>
More information about the llvm-commits
mailing list