[llvm] [Support] Add signed operations to DataExtractor (PR #147261)

Pavel Labath via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 7 02:27:17 PDT 2025


https://github.com/labath created https://github.com/llvm/llvm-project/pull/147261

This is motivated by the [SFrame
format](https://discourse.llvm.org/t/rfc-adding-sframe-support-to-llvm/86900), which contains several signed fields.

Having explicit signed operations makes the parsing code read better and avoids potential surprises if e.g. a "signed" uint8_t value is converted ta greater width.

>From 844bda2b70ce1d15ed5ccc920c997515b0a2425f Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel at labath.sk>
Date: Mon, 7 Jul 2025 11:19:53 +0200
Subject: [PATCH] [Support] Add signed operations to DataExtractor

This is motivated by the [SFrame
format](https://discourse.llvm.org/t/rfc-adding-sframe-support-to-llvm/86900),
which contains several signed fields.

Having explicit signed operations makes the parsing code read better and
avoids potential surprises if e.g. a "signed" uint8_t value is converted
ta greater width.
---
 llvm/include/llvm/Support/DataExtractor.h    | 48 ++++++++++++++++++++
 llvm/lib/Support/DataExtractor.cpp           |  8 ++--
 llvm/unittests/Support/DataExtractorTest.cpp | 15 ++++++
 3 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index 1f7e45d43ca7a..3792f53407dd9 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -407,6 +407,18 @@ class DataExtractor {
     getU8(C, Dst.data(), Count);
   }
 
+  /// Extract a int8_t value from \a *OffsetPtr. In case of an extraction error,
+  /// or if error is already set, zero is returned and the offset is left
+  /// unmodified.
+  int8_t getS8(uint64_t *OffsetPtr, Error *Err = nullptr) const {
+    return static_cast<int8_t>(getU8(OffsetPtr, Err));
+  }
+
+  /// Extract a int8_t value from \a *OffsetPtr. In case of an extraction error,
+  /// or if the cursor is already in an error state, zero is returned and the
+  /// offset is left unmodified.
+  int8_t getS8(Cursor &C) const { return static_cast<int8_t>(getU8(C)); }
+
   //------------------------------------------------------------------
   /// Extract a uint16_t value from \a *offset_ptr.
   ///
@@ -462,6 +474,18 @@ class DataExtractor {
   LLVM_ABI uint16_t *getU16(uint64_t *offset_ptr, uint16_t *dst,
                             uint32_t count) const;
 
+  /// Extract a int16_t value from \a *OffsetPtr. In case of an extraction
+  /// error, or if error is already set, zero is returned and the offset is left
+  /// unmodified.
+  int16_t getS16(uint64_t *OffsetPtr, Error *Err = nullptr) const {
+    return static_cast<int16_t>(getU16(OffsetPtr, Err));
+  }
+
+  /// Extract a int16_t value from \a *OffsetPtr. In case of an extraction
+  /// error, or if the cursor is already in an error state, zero is returned and
+  /// the offset is left unmodified.
+  int16_t getS16(Cursor &C) const { return static_cast<int16_t>(getU16(C)); }
+
   /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
   /// in a uint32_t.
   ///
@@ -543,6 +567,18 @@ class DataExtractor {
   LLVM_ABI uint32_t *getU32(uint64_t *offset_ptr, uint32_t *dst,
                             uint32_t count) const;
 
+  /// Extract a int32_t value from \a *OffsetPtr. In case of an extraction
+  /// error, or if error is already set, zero is returned and the offset is left
+  /// unmodified.
+  int32_t getS32(uint64_t *OffsetPtr, Error *Err = nullptr) const {
+    return static_cast<int32_t>(getU32(OffsetPtr, Err));
+  }
+
+  /// Extract a int32_t value from \a *OffsetPtr. In case of an extraction
+  /// error, or if the cursor is already in an error state, zero is returned and
+  /// the offset is left unmodified.
+  int32_t getS32(Cursor &C) const { return static_cast<int32_t>(getU32(C)); }
+
   /// Extract a uint64_t value from \a *offset_ptr.
   ///
   /// Extract a single uint64_t from the binary data at the offset
@@ -596,6 +632,18 @@ class DataExtractor {
   LLVM_ABI uint64_t *getU64(uint64_t *offset_ptr, uint64_t *dst,
                             uint32_t count) const;
 
+  /// Extract a int64_t value from \a *OffsetPtr. In case of an extraction
+  /// error, or if error is already set, zero is returned and the offset is left
+  /// unmodified.
+  int64_t getS64(uint64_t *OffsetPtr, Error *Err = nullptr) const {
+    return static_cast<int64_t>(getU64(OffsetPtr, Err));
+  }
+
+  /// Extract a int64_t value from \a *OffsetPtr. In case of an extraction
+  /// error, or if the cursor is already in an error state, zero is returned and
+  /// the offset is left unmodified.
+  int64_t getS64(Cursor &C) const { return static_cast<int64_t>(getU64(C)); }
+
   /// Extract a signed LEB128 value from \a *offset_ptr.
   ///
   /// Extracts an signed LEB128 number from this object's data
diff --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp
index 3da9511bf2669..7390093af08a1 100644
--- a/llvm/lib/Support/DataExtractor.cpp
+++ b/llvm/lib/Support/DataExtractor.cpp
@@ -142,13 +142,13 @@ int64_t
 DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
   switch (byte_size) {
   case 1:
-    return (int8_t)getU8(offset_ptr);
+    return getS8(offset_ptr);
   case 2:
-    return (int16_t)getU16(offset_ptr);
+    return getS16(offset_ptr);
   case 4:
-    return (int32_t)getU32(offset_ptr);
+    return getS32(offset_ptr);
   case 8:
-    return (int64_t)getU64(offset_ptr);
+    return getS64(offset_ptr);
   }
   llvm_unreachable("getSigned unhandled case!");
 }
diff --git a/llvm/unittests/Support/DataExtractorTest.cpp b/llvm/unittests/Support/DataExtractorTest.cpp
index e019cf6fc9256..a6e2c94c2f15b 100644
--- a/llvm/unittests/Support/DataExtractorTest.cpp
+++ b/llvm/unittests/Support/DataExtractorTest.cpp
@@ -78,15 +78,30 @@ TEST(DataExtractorTest, SignedNumbers) {
 
   EXPECT_EQ(-128, DE.getSigned(&offset, 1));
   EXPECT_EQ(1U, offset);
+  offset = 0;
+  EXPECT_EQ(-128, DE.getS8(&offset));
+  EXPECT_EQ(1U, offset);
+
   offset = 0;
   EXPECT_EQ(-32624, DE.getSigned(&offset, 2));
   EXPECT_EQ(2U, offset);
+  offset = 0;
+  EXPECT_EQ(-32624, DE.getS16(&offset));
+  EXPECT_EQ(2U, offset);
+
   offset = 0;
   EXPECT_EQ(-2137980929, DE.getSigned(&offset, 4));
   EXPECT_EQ(4U, offset);
+  offset = 0;
+  EXPECT_EQ(-2137980929, DE.getS32(&offset));
+  EXPECT_EQ(4U, offset);
+
   offset = 0;
   EXPECT_EQ(-9182558167379214336LL, DE.getSigned(&offset, 8));
   EXPECT_EQ(8U, offset);
+  offset = 0;
+  EXPECT_EQ(-9182558167379214336LL, DE.getS64(&offset));
+  EXPECT_EQ(8U, offset);
 }
 
 TEST(DataExtractorTest, Strings) {



More information about the llvm-commits mailing list