[llvm] [GlobalISel] Optimize ULEB128 usage (PR #90565)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 30 06:38:14 PDT 2024


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/90565

>From 03bc09d492da23142a197d999d200fd58161b102 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 30 Apr 2024 08:06:15 +0200
Subject: [PATCH 1/4] [GlobalISel] Optimize ULEB128 usage

- Remove some cases where ULEB128 isn't needed
- Add a fastDecodeULEB128 tailored for GlobalISel which does unchecked decoding optimized for the common case, which is 1 byte values.

Previous LEB128 decode function was about 45 instructions on X86 assuming it got inlined for our use case & the error checks got optimized out. This "fast" one is 23 instruction.
---
 .../CodeGen/GlobalISel/GIMatchTableExecutor.h | 25 +++++++++-
 .../GlobalISel/GIMatchTableExecutorImpl.h     | 15 +++---
 .../CodeGen/GlobalISel/CMakeLists.txt         |  1 +
 .../GlobalISel/GIMatchTableExecutorTest.cpp   | 49 +++++++++++++++++++
 .../GlobalISel/GlobalISelMatchTable.cpp       |  5 +-
 5 files changed, 82 insertions(+), 13 deletions(-)
 create mode 100644 llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 8eddc6a6a531b4..1086b1e89f1340 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -168,7 +168,7 @@ enum {
   /// operand.
   /// - InsnID(ULEB128) - Instruction ID
   /// - MMOIdx(ULEB128) - MMO index
-  /// - NumAddrSpace(ULEB128) - Number of valid address spaces
+  /// - NumAddrSpace(1) - Number of valid address spaces
   /// - AddrSpaceN(ULEB128) - An allowed space of the memory access
   /// - AddrSpaceN+1 ...
   GIM_CheckMemoryAddressSpace,
@@ -177,7 +177,7 @@ enum {
   /// memory operand.
   /// - InsnID(ULEB128) - Instruction ID
   /// - MMOIdx(ULEB128) - MMO index
-  /// - MinAlign(ULEB128) - Minimum acceptable alignment
+  /// - MinAlign(1) - Minimum acceptable alignment
   GIM_CheckMemoryAlignment,
 
   /// Check the size of the memory access for the given machine memory operand
@@ -713,6 +713,27 @@ class GIMatchTableExecutor {
     memcpy(&Ret, MatchTable, sizeof(Ret));
     return Ret;
   }
+
+public:
+  // Faster ULEB128 decoder tailored for the Match Table Executor.
+  //
+  // - Arguments are fixed to avoid mid-function checks.
+  // - Unchecked execution, assumes no error.
+  // - Fast common case handling (1 byte values).
+  LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+  fastDecodeULEB128(const uint8_t *__restrict MatchTable,
+                    uint64_t &CurrentIdx) {
+    uint64_t Value = MatchTable[CurrentIdx] & 0x7f;
+    if (LLVM_UNLIKELY(MatchTable[CurrentIdx++] >= 128)) {
+      unsigned Shift = 7;
+      do {
+        uint64_t Slice = MatchTable[CurrentIdx] & 0x7f;
+        Value += Slice << Shift;
+        Shift += 7;
+      } while (MatchTable[CurrentIdx++] >= 128);
+    }
+    return Value;
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
index dec2d97bb1fa7e..4d147bf20c26a5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
@@ -100,10 +100,7 @@ bool GIMatchTableExecutor::executeMatchTable(
   };
 
   const auto readULEB = [&]() {
-    unsigned N = 0;
-    uint64_t Val = decodeULEB128(MatchTable + CurrentIdx, &N);
-    CurrentIdx += N;
-    return Val;
+    return fastDecodeULEB128(MatchTable, CurrentIdx);
   };
 
   // Convenience function to return a signed value. This avoids
@@ -476,7 +473,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     }
     case GIM_CheckAtomicOrdering: {
       uint64_t InsnID = readULEB();
-      auto Ordering = (AtomicOrdering)readULEB();
+      auto Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
       DEBUG_WITH_TYPE(TgtExecutor::getName(),
                       dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs["
                              << InsnID << "], " << (uint64_t)Ordering << ")\n");
@@ -493,7 +490,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     }
     case GIM_CheckAtomicOrderingOrStrongerThan: {
       uint64_t InsnID = readULEB();
-      auto Ordering = (AtomicOrdering)readULEB();
+      auto Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
       DEBUG_WITH_TYPE(TgtExecutor::getName(),
                       dbgs() << CurrentIdx
                              << ": GIM_CheckAtomicOrderingOrStrongerThan(MIs["
@@ -511,7 +508,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     }
     case GIM_CheckAtomicOrderingWeakerThan: {
       uint64_t InsnID = readULEB();
-      auto Ordering = (AtomicOrdering)readULEB();
+      auto Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
       DEBUG_WITH_TYPE(TgtExecutor::getName(),
                       dbgs() << CurrentIdx
                              << ": GIM_CheckAtomicOrderingWeakerThan(MIs["
@@ -531,7 +528,7 @@ bool GIMatchTableExecutor::executeMatchTable(
       uint64_t InsnID = readULEB();
       uint64_t MMOIdx = readULEB();
       // This accepts a list of possible address spaces.
-      const uint64_t NumAddrSpace = readULEB();
+      const uint64_t NumAddrSpace = MatchTable[CurrentIdx++];
 
       if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
         if (handleReject() == RejectAndGiveUp)
@@ -568,7 +565,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     case GIM_CheckMemoryAlignment: {
       uint64_t InsnID = readULEB();
       uint64_t MMOIdx = readULEB();
-      uint64_t MinAlign = readULEB();
+      uint64_t MinAlign = MatchTable[CurrentIdx++];
 
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
 
diff --git a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt
index 6ed2409f2ad755..111d7f4d2f6200 100644
--- a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(GlobalISelTests
   ConstantFoldingTest.cpp
   CSETest.cpp
+  GIMatchTableExecutorTest.cpp
   LegalizerTest.cpp
   LegalizerHelperTest.cpp
   LegalizerInfoTest.cpp
diff --git a/llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp b/llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp
new file mode 100644
index 00000000000000..5a811d79592880
--- /dev/null
+++ b/llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp
@@ -0,0 +1,49 @@
+//===- GIMatchTableExecutorTest.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(GlobalISelLEB128Test, fastDecodeULEB128) {
+#define EXPECT_DECODE_ULEB128_EQ(EXPECTED, VALUE)                              \
+  do {                                                                         \
+    uint64_t ActualSize = 0;                                                   \
+    uint64_t Actual = GIMatchTableExecutor::fastDecodeULEB128(                 \
+        reinterpret_cast<const uint8_t *>(VALUE), ActualSize);                 \
+    EXPECT_EQ(sizeof(VALUE) - 1, ActualSize);                                  \
+    EXPECT_EQ(EXPECTED, Actual);                                               \
+  } while (0)
+
+  EXPECT_DECODE_ULEB128_EQ(0u, "\x00");
+  EXPECT_DECODE_ULEB128_EQ(1u, "\x01");
+  EXPECT_DECODE_ULEB128_EQ(63u, "\x3f");
+  EXPECT_DECODE_ULEB128_EQ(64u, "\x40");
+  EXPECT_DECODE_ULEB128_EQ(0x7fu, "\x7f");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x01");
+  EXPECT_DECODE_ULEB128_EQ(0x81u, "\x81\x01");
+  EXPECT_DECODE_ULEB128_EQ(0x90u, "\x90\x01");
+  EXPECT_DECODE_ULEB128_EQ(0xffu, "\xff\x01");
+  EXPECT_DECODE_ULEB128_EQ(0x100u, "\x80\x02");
+  EXPECT_DECODE_ULEB128_EQ(0x101u, "\x81\x02");
+  EXPECT_DECODE_ULEB128_EQ(4294975616ULL, "\x80\xc1\x80\x80\x10");
+
+  // Decode ULEB128 with extra padding bytes
+  EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x80\x80\x80\x80\x80\x80\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80000000'00000000ul,
+                           "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01");
+
+#undef EXPECT_DECODE_ULEB128_EQ
+}
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 8af219f34e18b6..cfce540a95d994 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1565,13 +1565,14 @@ bool MemoryAddressSpacePredicateMatcher::isIdentical(
 
 void MemoryAddressSpacePredicateMatcher::emitPredicateOpcodes(
     MatchTable &Table, RuleMatcher &Rule) const {
+  assert(AddrSpaces.size() < 255);
   Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
         << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
         << MatchTable::Comment("MMO")
         << MatchTable::ULEB128Value(MMOIdx)
         // Encode number of address spaces to expect.
         << MatchTable::Comment("NumAddrSpace")
-        << MatchTable::ULEB128Value(AddrSpaces.size());
+        << MatchTable::IntValue(1, AddrSpaces.size());
   for (unsigned AS : AddrSpaces)
     Table << MatchTable::Comment("AddrSpace") << MatchTable::ULEB128Value(AS);
 
@@ -1593,7 +1594,7 @@ void MemoryAlignmentPredicateMatcher::emitPredicateOpcodes(
   Table << MatchTable::Opcode("GIM_CheckMemoryAlignment")
         << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
         << MatchTable::Comment("MMO") << MatchTable::ULEB128Value(MMOIdx)
-        << MatchTable::Comment("MinAlign") << MatchTable::ULEB128Value(MinAlign)
+        << MatchTable::Comment("MinAlign") << MatchTable::IntValue(1, MinAlign)
         << MatchTable::LineBreak;
 }
 

>From cd7718998316015421d9a51f6e4d5f53836da882 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 30 Apr 2024 14:29:06 +0200
Subject: [PATCH 2/4] remove __restrict

---
 llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 1086b1e89f1340..38d23d25d5b775 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -721,8 +721,7 @@ class GIMatchTableExecutor {
   // - Unchecked execution, assumes no error.
   // - Fast common case handling (1 byte values).
   LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
-  fastDecodeULEB128(const uint8_t *__restrict MatchTable,
-                    uint64_t &CurrentIdx) {
+  fastDecodeULEB128(const uint8_t *MatchTable, uint64_t &CurrentIdx) {
     uint64_t Value = MatchTable[CurrentIdx] & 0x7f;
     if (LLVM_UNLIKELY(MatchTable[CurrentIdx++] >= 128)) {
       unsigned Shift = 7;

>From fb23dfc8cf85213477b49855f8c01e188f61b4a9 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 30 Apr 2024 14:57:37 +0200
Subject: [PATCH 3/4] address comments

---
 llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h  | 5 +++--
 .../TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp      | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 38d23d25d5b775..b2ac6977feef46 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -722,8 +722,9 @@ class GIMatchTableExecutor {
   // - Fast common case handling (1 byte values).
   LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
   fastDecodeULEB128(const uint8_t *MatchTable, uint64_t &CurrentIdx) {
-    uint64_t Value = MatchTable[CurrentIdx] & 0x7f;
-    if (LLVM_UNLIKELY(MatchTable[CurrentIdx++] >= 128)) {
+    uint64_t Value = MatchTable[CurrentIdx++];
+    if (LLVM_UNLIKELY(Value >= 128)) {
+      Value &= 0x7f;
       unsigned Shift = 7;
       do {
         uint64_t Slice = MatchTable[CurrentIdx] & 0x7f;
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index cfce540a95d994..af3c93f669d0ad 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1565,7 +1565,7 @@ bool MemoryAddressSpacePredicateMatcher::isIdentical(
 
 void MemoryAddressSpacePredicateMatcher::emitPredicateOpcodes(
     MatchTable &Table, RuleMatcher &Rule) const {
-  assert(AddrSpaces.size() < 255);
+  assert(AddrSpaces.size() < 256);
   Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
         << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
         << MatchTable::Comment("MMO")

>From 5013299d89b6d42950adfea0b27c869ab2e21e59 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 30 Apr 2024 15:38:01 +0200
Subject: [PATCH 4/4] add compiler macro

---
 .../llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h        | 3 ++-
 llvm/include/llvm/Support/Compiler.h                      | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index b2ac6977feef46..72c63ecba529f5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -721,7 +721,8 @@ class GIMatchTableExecutor {
   // - Unchecked execution, assumes no error.
   // - Fast common case handling (1 byte values).
   LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
-  fastDecodeULEB128(const uint8_t *MatchTable, uint64_t &CurrentIdx) {
+  fastDecodeULEB128(const uint8_t *LLVM_ATTRIBUTE_RESTRICT MatchTable,
+                    uint64_t &CurrentIdx) {
     uint64_t Value = MatchTable[CurrentIdx++];
     if (LLVM_UNLIKELY(Value >= 128)) {
       Value &= 0x7f;
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index 8c315d255bb772..6725ef51c86c01 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -278,6 +278,14 @@
 #define LLVM_ATTRIBUTE_RETURNS_NONNULL
 #endif
 
+/// LLVM_ATTRIBUTE_RESTRICT- Annotates a pointer to tell the compiler that
+/// it is not aliased in the current scope.
+#if defined(__clang__) || defined(__GNUC__) || defined(_MSC_VER)
+#define LLVM_ATTRIBUTE_RESTRICT __restrict
+#else
+#define LLVM_ATTRIBUTE_RESTRICT
+#endif
+
 /// \macro LLVM_ATTRIBUTE_RETURNS_NOALIAS Used to mark a function as returning a
 /// pointer that does not alias any other valid pointer.
 #ifdef __GNUC__



More information about the llvm-commits mailing list