Experiment on speeding up offset computation in SHF_MERGE
Rafael EspĂndola via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 24 15:59:28 PDT 2016
During bsdcan Rui suggested an idea for improving the performance of
offset computation: Instead of having relocations use section+offset,
always use a symbol. That would give us a place to cache the value.
The attached patches implement that. Unfortunately the .o produced are
substantially larger and in the end there is a performance regression.
I didn't investigate it too much, but it looks like the net impact is
to just move cache misses from getOffset to getVA, and as we now use
more memory, there are more misses.
I have uploaded two reproducibles of a debug build with clang. One
uses section symbols, the other does not.
https://drive.google.com/open?id=0B7iRtublysV6U244RG1BYkE3dDg
https://drive.google.com/open?id=0B7iRtublysV6aktKWWViTzhCbDg
Another thing I tried was using a complete binary tree instead of a
binary search. While that shows promise, it hit code quality issues in
llvm pr30771, pr30772. A very hackish patch that also includes a
benchmark is attached too.
My current understanding is that the local algorithms we for string
merging and offset computation are pretty efficient. What might still
be possible is
* Reducing memory consumption somewhere else to improve cache hits.
* Not copying as much debug info (dwo, ideally single file dwo).
Cheers,
Rafael
-------------- next part --------------
diff --git a/ELF/Symbols.cpp b/ELF/Symbols.cpp
index 707143e..263828b 100644
--- a/ELF/Symbols.cpp
+++ b/ELF/Symbols.cpp
@@ -146,7 +146,11 @@ template <class ELFT> bool SymbolBody::hasThunk() const {
template <class ELFT>
typename ELFT::uint SymbolBody::getVA(typename ELFT::uint Addend) const {
+ if (VA != (size_t)-1)
+ return VA + Addend;
typename ELFT::uint OutVA = getSymVA<ELFT>(*this, Addend);
+ if (!isSection())
+ VA = OutVA;
return OutVA + Addend;
}
diff --git a/ELF/Symbols.h b/ELF/Symbols.h
index 9c6da02..73afa2d 100644
--- a/ELF/Symbols.h
+++ b/ELF/Symbols.h
@@ -79,6 +79,7 @@ public:
uint8_t getVisibility() const { return StOther & 0x3; }
+
bool isInGot() const { return GotIndex != -1U; }
bool isInPlt() const { return PltIndex != -1U; }
template <class ELFT> bool hasThunk() const;
@@ -99,8 +100,12 @@ public:
unsigned DynsymIndex = 0;
uint32_t GotIndex = -1;
+
uint32_t GotPltIndex = -1;
uint32_t PltIndex = -1;
+
+ mutable size_t VA = -1;
+
uint32_t GlobalDynIndex = -1;
protected:
-------------- next part --------------
diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp
index b115aab..c5040bb 100644
--- a/lib/MC/ELFObjectWriter.cpp
+++ b/lib/MC/ELFObjectWriter.cpp
@@ -574,6 +574,9 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
if (C != 0)
return true;
+ // FIXME:
+ return true;
+
// It looks like gold has a bug (http://sourceware.org/PR16794) and can
// only handle section relocations to mergeable sections if using RELA.
if (!hasRelocationAddend())
-------------- next part --------------
diff --git a/include/llvm/ADT/ArrayRef.h b/include/llvm/ADT/ArrayRef.h
index 1c90478..d2efeec 100644
--- a/include/llvm/ADT/ArrayRef.h
+++ b/include/llvm/ADT/ArrayRef.h
@@ -31,6 +31,7 @@ namespace llvm {
template<typename T>
class LLVM_NODISCARD ArrayRef {
public:
+ typedef T value_type;
typedef const T *iterator;
typedef const T *const_iterator;
typedef size_t size_type;
diff --git a/include/llvm/ADT/CompleteBinTree.h b/include/llvm/ADT/CompleteBinTree.h
new file mode 100644
index 0000000..54e98d9
--- /dev/null
+++ b/include/llvm/ADT/CompleteBinTree.h
@@ -0,0 +1,80 @@
+//===- llvm/ADT/CompleteBinTree.h -------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_COMPLETEBINTREE_H
+#define LLVM_ADT_COMPLETEBINTREE_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+#include <memory>
+
+namespace llvm {
+
+template <typename T>
+void createCompleteBinTreeAux(MutableArrayRef<T> Out,
+ typename ArrayRef<T>::iterator &I, unsigned J) {
+ if (J >= Out.size())
+ return;
+ createCompleteBinTreeAux(Out, I, 2 * J + 1);
+ Out[J] = *I;
+ ++I;
+ createCompleteBinTreeAux(Out, I, 2 * J + 2);
+}
+
+template <typename T>
+std::unique_ptr<T> createCompleteBinTreeAux(ArrayRef<T> V) {
+ T *Ret = nullptr;
+ posix_memalign((void **)&Ret, 64, (V.size() + 1) * sizeof(T));
+ MutableArrayRef<T> RetRef(Ret + 1, V.size());
+ ArrayRef<T> VRef = V;
+ auto I = VRef.begin();
+ createCompleteBinTreeAux<T>(RetRef, I, 0);
+ return std::unique_ptr<T>(Ret);
+}
+
+template <class Vec>
+std::unique_ptr<typename Vec::value_type> createCompleteBinTree(const Vec &V) {
+ return createCompleteBinTreeAux<typename Vec::value_type>(V);
+}
+
+template <typename T, typename K, typename Func>
+size_t searchCompleteBinaryTreeAux(ArrayRef<T> A, const K V, Func Comp) {
+ size_t I = 1;
+ size_t N = A.size();
+ size_t P2N = PowerOf2Floor(N);
+ while (I < P2N) {
+ LLVM_PREFETCH(A.data() + 4 * I , 0, 3);
+ I = Comp(V, A[I]) ? 2*I : 2*I + 1;
+ }
+
+ I = I < N ? I : I / 2;
+ I = Comp(V, A[I]) ? 2 * I : 2 * I + 1;
+
+ size_t IP1 = I ;
+
+ size_t LastRightP1 = IP1 >> __builtin_ffsl(IP1);
+ return LastRightP1 == 0 ? A.size() : LastRightP1;
+}
+
+template <typename Vec, typename K, typename Func>
+typename Vec::const_iterator
+searchCompleteBinaryTree(const Vec &A, const K V, Func Comp) {
+ return A.begin() +
+ searchCompleteBinaryTreeAux<typename Vec::value_type, K>(A, V, Comp);
+}
+
+template <typename Vec, typename K>
+typename Vec::const_iterator searchCompleteBinaryTree(const Vec &A,
+ const K V) {
+ return searchCompleteBinaryTree<Vec, K>(
+ A, V, operator< <K, typename Vec::value_type>);
+}
+}
+
+#endif
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index 9e10e92..b715e11 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -10,6 +10,7 @@ set(ADTSources
BitmaskEnumTest.cpp
BitVectorTest.cpp
BumpPtrListTest.cpp
+ CompleteBinTree.cpp
DAGDeltaAlgorithmTest.cpp
DeltaAlgorithmTest.cpp
DenseMapTest.cpp
diff --git a/unittests/ADT/CompleteBinTree.cpp b/unittests/ADT/CompleteBinTree.cpp
new file mode 100644
index 0000000..a056239
--- /dev/null
+++ b/unittests/ADT/CompleteBinTree.cpp
@@ -0,0 +1,108 @@
+//===- llvm/unittest/ADT/CompleteBinTree.cpp ------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/CompleteBinTree.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+template <class It, class T, class Compare>
+__attribute__((noinline)) static It
+fastUpperBound(It First, It Last, T Value, Compare Comp) {
+ size_t Size = std::distance(First, Last);
+ assert(Size != 0);
+ while (Size != 1) {
+ size_t H = Size / 2;
+ LLVM_PREFETCH(&*(First + H / 2), 0, 3);
+ LLVM_PREFETCH(&*(First + H + H / 2), 0, 3);
+ const It MI = First + H;
+ Size -= H;
+ First = Comp(Value, *MI) ? First : First + H;
+ }
+ return Comp(Value, *First) ? First : First + 1;
+}
+
+TEST(CompleteBinTreeTest, Bench) {
+ srand(1);
+ size_t N = 65536 + 10000;
+ size_t Max = 3951113;
+ typedef std::pair<size_t, size_t> Pair;
+ std::vector<Pair> Data;
+ std::vector<size_t> Query;
+ Data.reserve(N);
+ Query.reserve(N);
+ for (size_t I = 0; I < N; ++I) {
+ Query.push_back(rand() % (Max + 1));
+ Data.push_back(std::make_pair(rand() % (Max + 1), 0));
+ }
+ std::sort(Data.begin(), Data.end(),
+ [](const Pair &A, const Pair &B) { return A.first < B.first; });
+
+ std::unique_ptr<Pair> TreeP = createCompleteBinTree(Data);
+ ArrayRef<Pair> Tree(TreeP.get(), Data.size()+1);
+
+ int Repeat = 1000;
+
+ for (int I = 0; I < Repeat; ++I)
+ for (size_t Q : Query) {
+ auto Iter = searchCompleteBinaryTree(
+ Tree, Q, [](size_t Q, const Pair &P) { return Q < P.first; });
+ (void)Iter;
+ }
+
+ for (int I = 0; I < 0; ++I)
+ for (size_t Q : Query) {
+ auto Iter =
+ fastUpperBound(Data.begin(), Data.end(), Q,
+ [](size_t V, const Pair &P) { return V < P.first; });
+ (void)Iter;
+ }
+}
+
+TEST(CompleteBinTreeTest, Basic) {
+ std::vector<int> Data{2, 4, 6, 8, 10};
+ std::unique_ptr<int> TreeP = createCompleteBinTree(Data);
+ ArrayRef<int> Tree(TreeP.get(), Data.size() + 1);
+ std::vector<int> Expected{8, 4, 10, 2, 6};
+ EXPECT_EQ(makeArrayRef(Expected), Tree.slice(1));
+
+ EXPECT_EQ(2, *searchCompleteBinaryTree(Tree, 2));
+ EXPECT_EQ(4, *searchCompleteBinaryTree(Tree, 4));
+ EXPECT_EQ(6, *searchCompleteBinaryTree(Tree, 6));
+ EXPECT_EQ(8, *searchCompleteBinaryTree(Tree, 8));
+ EXPECT_EQ(10, *searchCompleteBinaryTree(Tree, 10));
+
+ EXPECT_EQ(Tree.end(), searchCompleteBinaryTree(Tree, 1));
+ EXPECT_EQ(2, *searchCompleteBinaryTree(Tree, 3));
+ EXPECT_EQ(4, *searchCompleteBinaryTree(Tree, 5));
+ EXPECT_EQ(6, *searchCompleteBinaryTree(Tree, 7));
+ EXPECT_EQ(8, *searchCompleteBinaryTree(Tree, 9));
+ EXPECT_EQ(10, *searchCompleteBinaryTree(Tree, 11));
+
+ const std::vector<int> &CTree = Tree;
+ EXPECT_EQ(2, *searchCompleteBinaryTree(CTree, 2));
+}
+
+TEST(CompleteBinTreeTest, DifferentKey) {
+ std::vector<std::pair<int, char>> Data{
+ {2, 'a'}, {4, 'b'}, {6, 'c'}, {8, 'd'}};
+ std::unique_ptr<std::pair<int, char>> TreeP = createCompleteBinTree(Data);
+ ArrayRef<std::pair<int, char>> Tree(TreeP.get(), Data.size()+1);
+ std::vector<std::pair<int, char>> Expected{
+ {6, 'c'}, {4, 'b'}, {8, 'd'}, {2, 'a'}};
+ EXPECT_EQ(makeArrayRef(Expected), Tree.slice(1));
+
+ auto I = searchCompleteBinaryTree(
+ Tree, 3,
+ [](int A, const std::pair<int, char> &B) { return A < B.first; });
+ EXPECT_EQ(std::make_pair(2, 'a'), *I);
+}
+}
More information about the llvm-commits
mailing list