[llvm] [Support] Add decodeULEB128AndInc/decodeSLEB128AndInc (PR #85739)

Mon Mar 18 23:18:53 PDT 2024

llvmbot wrote:



@llvm/pr-subscribers-llvm-support

@llvm/pr-subscribers-objectyaml

Author: Fangrui Song (MaskRay)

<details>
<summary>Changes</summary>

Many decodeULEB128/decodeSLEB128 users need to increment the pointer.
Add helpers to simplify this common pattern. We don't add `end` and
`error` parameters at present because many users don't need them.


---
Full diff: https://github.com/llvm/llvm-project/pull/85739.diff


4 Files Affected:

- (modified) llvm/include/llvm/Support/LEB128.h (+14) 
- (modified) llvm/tools/obj2yaml/macho2yaml.cpp (+3-7) 
- (modified) llvm/unittests/Support/LEB128Test.cpp (+15) 
- (modified) llvm/utils/TableGen/DecoderEmitter.cpp (+10-23) 


``````````diff

diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h
index 7fc572b6ff06ef..c4e741549f3ff1 100644
--- a/llvm/include/llvm/Support/LEB128.h
+++ b/llvm/include/llvm/Support/LEB128.h
@@ -200,6 +200,20 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr,
   return Value;
 }
 
+inline uint64_t decodeULEB128AndInc(const uint8_t *&p) {
+  unsigned n;
+  auto ret = decodeULEB128(p, &n);
+  p += n;
+  return ret;
+}
+
+inline int64_t decodeSLEB128AndInc(const uint8_t *&p) {
+  unsigned n;
+  auto ret = decodeSLEB128(p, &n);
+  p += n;
+  return ret;
+}
+
 /// Utility function to get the size of the ULEB128-encoded value.
 extern unsigned getULEB128Size(uint64_t Value);
 
diff --git a/llvm/tools/obj2yaml/macho2yaml.cpp b/llvm/tools/obj2yaml/macho2yaml.cpp
index cdd871e8c1d684..d4a8c092a083f4 100644
--- a/llvm/tools/obj2yaml/macho2yaml.cpp
+++ b/llvm/tools/obj2yaml/macho2yaml.cpp
@@ -427,15 +427,13 @@ void MachODumper::dumpBindOpcodes(
         static_cast<MachO::BindOpcode>(*OpCode & MachO::BIND_OPCODE_MASK);
     BindOp.Imm = *OpCode & MachO::BIND_IMMEDIATE_MASK;
 
-    unsigned Count;
     uint64_t ULEB = 0;
     int64_t SLEB = 0;
 
     switch (BindOp.Opcode) {
     case MachO::BIND_OPCODE_DO_BIND_ULEB_TIMES_SKIPPING_ULEB:
-      ULEB = decodeULEB128(OpCode + 1, &Count);
+      ULEB = decodeULEB128AndInc(++OpCode);
       BindOp.ULEBExtraData.push_back(ULEB);
-      OpCode += Count;
       [[fallthrough]];
     // Intentionally no break here -- this opcode has two ULEB values
 
@@ -443,15 +441,13 @@ void MachODumper::dumpBindOpcodes(
     case MachO::BIND_OPCODE_SET_SEGMENT_AND_OFFSET_ULEB:
     case MachO::BIND_OPCODE_ADD_ADDR_ULEB:
     case MachO::BIND_OPCODE_DO_BIND_ADD_ADDR_ULEB:
-      ULEB = decodeULEB128(OpCode + 1, &Count);
+      ULEB = decodeULEB128AndInc(++OpCode);
       BindOp.ULEBExtraData.push_back(ULEB);
-      OpCode += Count;
       break;
 
     case MachO::BIND_OPCODE_SET_ADDEND_SLEB:
-      SLEB = decodeSLEB128(OpCode + 1, &Count);
+      SLEB = decodeSLEB128AndInc(++OpCode);
       BindOp.SLEBExtraData.push_back(SLEB);
-      OpCode += Count;
       break;
 
     case MachO::BIND_OPCODE_SET_SYMBOL_TRAILING_FLAGS_IMM:
diff --git a/llvm/unittests/Support/LEB128Test.cpp b/llvm/unittests/Support/LEB128Test.cpp
index 21523e5f7a08c7..08b8c5573ce637 100644
--- a/llvm/unittests/Support/LEB128Test.cpp
+++ b/llvm/unittests/Support/LEB128Test.cpp
@@ -242,6 +242,21 @@ TEST(LEB128Test, DecodeInvalidSLEB128) {
 #undef EXPECT_INVALID_SLEB128
 }
 
+TEST(LEB128Test, DecodeAndInc) {
+#define EXPECT_LEB128(FUN, VALUE, SIZE)                                        \
+  do {                                                                         \
+    const uint8_t *V = reinterpret_cast<const uint8_t *>(VALUE), *P = V;       \
+    auto Expected = FUN(P), Actual = FUN##AndInc(P);                           \
+    EXPECT_EQ(Actual, Expected);                                               \
+    EXPECT_EQ(P - V, SIZE);                                                    \
+  } while (0)
+  EXPECT_LEB128(decodeULEB128, "\x7f", 1);
+  EXPECT_LEB128(decodeULEB128, "\x80\x01", 2);
+  EXPECT_LEB128(decodeSLEB128, "\x7f", 1);
+  EXPECT_LEB128(decodeSLEB128, "\x80\x01", 2);
+#undef EXPECT_LEB128
+}
+
 TEST(LEB128Test, SLEB128Size) {
   // Positive Value Testing Plan:
   // (1) 128 ^ n - 1 ........ need (n+1) bytes
diff --git a/llvm/utils/TableGen/DecoderEmitter.cpp b/llvm/utils/TableGen/DecoderEmitter.cpp
index 628bff520a12e9..732f34ed04c577 100644
--- a/llvm/utils/TableGen/DecoderEmitter.cpp
+++ b/llvm/utils/TableGen/DecoderEmitter.cpp
@@ -2283,10 +2283,8 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
     }
     case MCD::OPC_CheckField: {
       // Decode the start value.
-      unsigned Len;
-      unsigned Start = decodeULEB128(++Ptr, &Len);
-      Ptr += Len;
-      Len = *Ptr;)";
+      unsigned Start = decodeULEB128AndInc(++Ptr);
+      unsigned Len = *Ptr;)";
   if (IsVarLenInst)
     OS << "\n      makeUp(insn, Start + Len);";
   OS << R"(
@@ -2311,10 +2309,8 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       break;
     }
     case MCD::OPC_CheckPredicate: {
-      unsigned Len;
       // Decode the Predicate Index value.
-      unsigned PIdx = decodeULEB128(++Ptr, &Len);
-      Ptr += Len;
+      unsigned PIdx = decodeULEB128AndInc(++Ptr);
       // NumToSkip is a plain 24-bit integer.
       unsigned NumToSkip = *Ptr++;
       NumToSkip |= (*Ptr++) << 8;
@@ -2330,18 +2326,15 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       break;
     }
     case MCD::OPC_Decode: {
-      unsigned Len;
       // Decode the Opcode value.
-      unsigned Opc = decodeULEB128(++Ptr, &Len);
-      Ptr += Len;
-      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);
-      Ptr += Len;
+      unsigned Opc = decodeULEB128AndInc(++Ptr);
+      unsigned DecodeIdx = decodeULEB128AndInc(Ptr);
 
       MI.clear();
       MI.setOpcode(Opc);
       bool DecodeComplete;)";
   if (IsVarLenInst) {
-    OS << "\n      Len = InstrLenTable[Opc];\n"
+    OS << "\n      unsigned Len = InstrLenTable[Opc];\n"
        << "      makeUp(insn, Len);";
   }
   OS << R"(
@@ -2354,12 +2347,9 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
       return S;
     }
     case MCD::OPC_TryDecode: {
-      unsigned Len;
       // Decode the Opcode value.
-      unsigned Opc = decodeULEB128(++Ptr, &Len);
-      Ptr += Len;
-      unsigned DecodeIdx = decodeULEB128(Ptr, &Len);
-      Ptr += Len;
+      unsigned Opc = decodeULEB128AndInc(++Ptr);
+      unsigned DecodeIdx = decodeULEB128AndInc(Ptr);
       // NumToSkip is a plain 24-bit integer.
       unsigned NumToSkip = *Ptr++;
       NumToSkip |= (*Ptr++) << 8;
@@ -2391,11 +2381,8 @@ static DecodeStatus decodeInstruction(const uint8_t DecodeTable[], MCInst &MI,
     }
     case MCD::OPC_SoftFail: {
       // Decode the mask values.
-      unsigned Len;
-      uint64_t PositiveMask = decodeULEB128(++Ptr, &Len);
-      Ptr += Len;
-      uint64_t NegativeMask = decodeULEB128(Ptr, &Len);
-      Ptr += Len;
+      uint64_t PositiveMask = decodeULEB128AndInc(++Ptr);
+      uint64_t NegativeMask = decodeULEB128AndInc(Ptr);
       bool Fail = (insn & PositiveMask) != 0 || (~insn & NegativeMask) != 0;
       if (Fail)
         S = MCDisassembler::SoftFail;

``````````

</details>


https://github.com/llvm/llvm-project/pull/85739