[llvm] [GlobalISel] Optimize ULEB128 usage (PR #90565)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 30 05:29:19 PDT 2024


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/90565

>From 03bc09d492da23142a197d999d200fd58161b102 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 30 Apr 2024 08:06:15 +0200
Subject: [PATCH 1/2] [GlobalISel] Optimize ULEB128 usage

- Remove some cases where ULEB128 isn't needed
- Add a fastDecodeULEB128 tailored for GlobalISel which does unchecked decoding optimized for the common case, which is 1 byte values.

Previous LEB128 decode function was about 45 instructions on X86 assuming it got inlined for our use case & the error checks got optimized out. This "fast" one is 23 instruction.
---
 .../CodeGen/GlobalISel/GIMatchTableExecutor.h | 25 +++++++++-
 .../GlobalISel/GIMatchTableExecutorImpl.h     | 15 +++---
 .../CodeGen/GlobalISel/CMakeLists.txt         |  1 +
 .../GlobalISel/GIMatchTableExecutorTest.cpp   | 49 +++++++++++++++++++
 .../GlobalISel/GlobalISelMatchTable.cpp       |  5 +-
 5 files changed, 82 insertions(+), 13 deletions(-)
 create mode 100644 llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 8eddc6a6a531b4..1086b1e89f1340 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -168,7 +168,7 @@ enum {
   /// operand.
   /// - InsnID(ULEB128) - Instruction ID
   /// - MMOIdx(ULEB128) - MMO index
-  /// - NumAddrSpace(ULEB128) - Number of valid address spaces
+  /// - NumAddrSpace(1) - Number of valid address spaces
   /// - AddrSpaceN(ULEB128) - An allowed space of the memory access
   /// - AddrSpaceN+1 ...
   GIM_CheckMemoryAddressSpace,
@@ -177,7 +177,7 @@ enum {
   /// memory operand.
   /// - InsnID(ULEB128) - Instruction ID
   /// - MMOIdx(ULEB128) - MMO index
-  /// - MinAlign(ULEB128) - Minimum acceptable alignment
+  /// - MinAlign(1) - Minimum acceptable alignment
   GIM_CheckMemoryAlignment,
 
   /// Check the size of the memory access for the given machine memory operand
@@ -713,6 +713,27 @@ class GIMatchTableExecutor {
     memcpy(&Ret, MatchTable, sizeof(Ret));
     return Ret;
   }
+
+public:
+  // Faster ULEB128 decoder tailored for the Match Table Executor.
+  //
+  // - Arguments are fixed to avoid mid-function checks.
+  // - Unchecked execution, assumes no error.
+  // - Fast common case handling (1 byte values).
+  LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
+  fastDecodeULEB128(const uint8_t *__restrict MatchTable,
+                    uint64_t &CurrentIdx) {
+    uint64_t Value = MatchTable[CurrentIdx] & 0x7f;
+    if (LLVM_UNLIKELY(MatchTable[CurrentIdx++] >= 128)) {
+      unsigned Shift = 7;
+      do {
+        uint64_t Slice = MatchTable[CurrentIdx] & 0x7f;
+        Value += Slice << Shift;
+        Shift += 7;
+      } while (MatchTable[CurrentIdx++] >= 128);
+    }
+    return Value;
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
index dec2d97bb1fa7e..4d147bf20c26a5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h
@@ -100,10 +100,7 @@ bool GIMatchTableExecutor::executeMatchTable(
   };
 
   const auto readULEB = [&]() {
-    unsigned N = 0;
-    uint64_t Val = decodeULEB128(MatchTable + CurrentIdx, &N);
-    CurrentIdx += N;
-    return Val;
+    return fastDecodeULEB128(MatchTable, CurrentIdx);
   };
 
   // Convenience function to return a signed value. This avoids
@@ -476,7 +473,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     }
     case GIM_CheckAtomicOrdering: {
       uint64_t InsnID = readULEB();
-      auto Ordering = (AtomicOrdering)readULEB();
+      auto Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
       DEBUG_WITH_TYPE(TgtExecutor::getName(),
                       dbgs() << CurrentIdx << ": GIM_CheckAtomicOrdering(MIs["
                              << InsnID << "], " << (uint64_t)Ordering << ")\n");
@@ -493,7 +490,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     }
     case GIM_CheckAtomicOrderingOrStrongerThan: {
       uint64_t InsnID = readULEB();
-      auto Ordering = (AtomicOrdering)readULEB();
+      auto Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
       DEBUG_WITH_TYPE(TgtExecutor::getName(),
                       dbgs() << CurrentIdx
                              << ": GIM_CheckAtomicOrderingOrStrongerThan(MIs["
@@ -511,7 +508,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     }
     case GIM_CheckAtomicOrderingWeakerThan: {
       uint64_t InsnID = readULEB();
-      auto Ordering = (AtomicOrdering)readULEB();
+      auto Ordering = (AtomicOrdering)MatchTable[CurrentIdx++];
       DEBUG_WITH_TYPE(TgtExecutor::getName(),
                       dbgs() << CurrentIdx
                              << ": GIM_CheckAtomicOrderingWeakerThan(MIs["
@@ -531,7 +528,7 @@ bool GIMatchTableExecutor::executeMatchTable(
       uint64_t InsnID = readULEB();
       uint64_t MMOIdx = readULEB();
       // This accepts a list of possible address spaces.
-      const uint64_t NumAddrSpace = readULEB();
+      const uint64_t NumAddrSpace = MatchTable[CurrentIdx++];
 
       if (State.MIs[InsnID]->getNumMemOperands() <= MMOIdx) {
         if (handleReject() == RejectAndGiveUp)
@@ -568,7 +565,7 @@ bool GIMatchTableExecutor::executeMatchTable(
     case GIM_CheckMemoryAlignment: {
       uint64_t InsnID = readULEB();
       uint64_t MMOIdx = readULEB();
-      uint64_t MinAlign = readULEB();
+      uint64_t MinAlign = MatchTable[CurrentIdx++];
 
       assert(State.MIs[InsnID] != nullptr && "Used insn before defined");
 
diff --git a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt
index 6ed2409f2ad755..111d7f4d2f6200 100644
--- a/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/unittests/CodeGen/GlobalISel/CMakeLists.txt
@@ -15,6 +15,7 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(GlobalISelTests
   ConstantFoldingTest.cpp
   CSETest.cpp
+  GIMatchTableExecutorTest.cpp
   LegalizerTest.cpp
   LegalizerHelperTest.cpp
   LegalizerInfoTest.cpp
diff --git a/llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp b/llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp
new file mode 100644
index 00000000000000..5a811d79592880
--- /dev/null
+++ b/llvm/unittests/CodeGen/GlobalISel/GIMatchTableExecutorTest.cpp
@@ -0,0 +1,49 @@
+//===- GIMatchTableExecutorTest.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+TEST(GlobalISelLEB128Test, fastDecodeULEB128) {
+#define EXPECT_DECODE_ULEB128_EQ(EXPECTED, VALUE)                              \
+  do {                                                                         \
+    uint64_t ActualSize = 0;                                                   \
+    uint64_t Actual = GIMatchTableExecutor::fastDecodeULEB128(                 \
+        reinterpret_cast<const uint8_t *>(VALUE), ActualSize);                 \
+    EXPECT_EQ(sizeof(VALUE) - 1, ActualSize);                                  \
+    EXPECT_EQ(EXPECTED, Actual);                                               \
+  } while (0)
+
+  EXPECT_DECODE_ULEB128_EQ(0u, "\x00");
+  EXPECT_DECODE_ULEB128_EQ(1u, "\x01");
+  EXPECT_DECODE_ULEB128_EQ(63u, "\x3f");
+  EXPECT_DECODE_ULEB128_EQ(64u, "\x40");
+  EXPECT_DECODE_ULEB128_EQ(0x7fu, "\x7f");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x01");
+  EXPECT_DECODE_ULEB128_EQ(0x81u, "\x81\x01");
+  EXPECT_DECODE_ULEB128_EQ(0x90u, "\x90\x01");
+  EXPECT_DECODE_ULEB128_EQ(0xffu, "\xff\x01");
+  EXPECT_DECODE_ULEB128_EQ(0x100u, "\x80\x02");
+  EXPECT_DECODE_ULEB128_EQ(0x101u, "\x81\x02");
+  EXPECT_DECODE_ULEB128_EQ(4294975616ULL, "\x80\xc1\x80\x80\x10");
+
+  // Decode ULEB128 with extra padding bytes
+  EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0u, "\x80\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x7fu, "\xff\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80u, "\x80\x81\x80\x80\x80\x80\x80\x80\x80\x00");
+  EXPECT_DECODE_ULEB128_EQ(0x80000000'00000000ul,
+                           "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01");
+
+#undef EXPECT_DECODE_ULEB128_EQ
+}
diff --git a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
index 8af219f34e18b6..cfce540a95d994 100644
--- a/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
+++ b/llvm/utils/TableGen/Common/GlobalISel/GlobalISelMatchTable.cpp
@@ -1565,13 +1565,14 @@ bool MemoryAddressSpacePredicateMatcher::isIdentical(
 
 void MemoryAddressSpacePredicateMatcher::emitPredicateOpcodes(
     MatchTable &Table, RuleMatcher &Rule) const {
+  assert(AddrSpaces.size() < 255);
   Table << MatchTable::Opcode("GIM_CheckMemoryAddressSpace")
         << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
         << MatchTable::Comment("MMO")
         << MatchTable::ULEB128Value(MMOIdx)
         // Encode number of address spaces to expect.
         << MatchTable::Comment("NumAddrSpace")
-        << MatchTable::ULEB128Value(AddrSpaces.size());
+        << MatchTable::IntValue(1, AddrSpaces.size());
   for (unsigned AS : AddrSpaces)
     Table << MatchTable::Comment("AddrSpace") << MatchTable::ULEB128Value(AS);
 
@@ -1593,7 +1594,7 @@ void MemoryAlignmentPredicateMatcher::emitPredicateOpcodes(
   Table << MatchTable::Opcode("GIM_CheckMemoryAlignment")
         << MatchTable::Comment("MI") << MatchTable::ULEB128Value(InsnVarID)
         << MatchTable::Comment("MMO") << MatchTable::ULEB128Value(MMOIdx)
-        << MatchTable::Comment("MinAlign") << MatchTable::ULEB128Value(MinAlign)
+        << MatchTable::Comment("MinAlign") << MatchTable::IntValue(1, MinAlign)
         << MatchTable::LineBreak;
 }
 

>From cd7718998316015421d9a51f6e4d5f53836da882 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 30 Apr 2024 14:29:06 +0200
Subject: [PATCH 2/2] remove __restrict

---
 llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
index 1086b1e89f1340..38d23d25d5b775 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h
@@ -721,8 +721,7 @@ class GIMatchTableExecutor {
   // - Unchecked execution, assumes no error.
   // - Fast common case handling (1 byte values).
   LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
-  fastDecodeULEB128(const uint8_t *__restrict MatchTable,
-                    uint64_t &CurrentIdx) {
+  fastDecodeULEB128(const uint8_t *MatchTable, uint64_t &CurrentIdx) {
     uint64_t Value = MatchTable[CurrentIdx] & 0x7f;
     if (LLVM_UNLIKELY(MatchTable[CurrentIdx++] >= 128)) {
       unsigned Shift = 7;



More information about the llvm-commits mailing list