[llvm] df8dda6 - Add methods to data extractor for extracting bytes and fixed length C strings.

Greg Clayton via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 14:17:51 PST 2020


Author: Greg Clayton
Date: 2020-02-24T14:17:43-08:00
New Revision: df8dda67ed03f7d7ba3d9475556ab01946386852

URL: https://github.com/llvm/llvm-project/commit/df8dda67ed03f7d7ba3d9475556ab01946386852
DIFF: https://github.com/llvm/llvm-project/commit/df8dda67ed03f7d7ba3d9475556ab01946386852.diff

LOG: Add methods to data extractor for extracting bytes and fixed length C strings.

Summary:
These modificaitons will be used in D74883.

Fixed length C strings can have trailing NULLs or sometimes spaces (BSD archive files), so the fixed length C string defaults to stripping trailing NULLs, but can have the arguments specify to remove one or more kinds of spaces if needed. This is used to extract fixed length C strings from ELF NOTEs in D74883.

Reviewers: labath, dblaikie, aprantl

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D74991

Added: 
    

Modified: 
    llvm/include/llvm/Support/DataExtractor.h
    llvm/lib/Support/DataExtractor.cpp
    llvm/unittests/Support/DataExtractorTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Support/DataExtractor.h b/llvm/include/llvm/Support/DataExtractor.h
index 0be478811b22..164e5e8dcdd8 100644
--- a/llvm/include/llvm/Support/DataExtractor.h
+++ b/llvm/include/llvm/Support/DataExtractor.h
@@ -141,6 +141,62 @@ class DataExtractor {
   ///     a default-initialized StringRef will be returned.
   StringRef getCStrRef(uint64_t *offset_ptr) const;
 
+  /// Extract a fixed length string from \a *OffsetPtr and consume \a Length
+  /// bytes.
+  ///
+  /// Returns a StringRef for the string from the data at the offset
+  /// pointed to by \a OffsetPtr. A fixed length C string will be extracted
+  /// and the \a OffsetPtr will be advanced by \a Length bytes.
+  ///
+  /// \param[in,out] OffsetPtr
+  ///     A pointer to an offset within the data that will be advanced
+  ///     by the appropriate number of bytes if the value is extracted
+  ///     correctly. If the offset is out of bounds or there are not
+  ///     enough bytes to extract this value, the offset will be left
+  ///     unmodified.
+  ///
+  /// \param[in] Length
+  ///     The length of the fixed length string to extract. If there are not
+  ///     enough bytes in the data to extract the full string, the offset will
+  ///     be left unmodified.
+  ///
+  /// \param[in] TrimChars
+  ///     A set of characters to trim from the end of the string. Fixed length
+  ///     strings are commonly either NULL terminated by one or more zero
+  ///     bytes. Some clients have one or more spaces at the end of the string,
+  ///     but a good default is to trim the NULL characters.
+  ///
+  /// \return
+  ///     A StringRef for the C string value in the data. If the offset
+  ///     pointed to by \a OffsetPtr is out of bounds, or if the
+  ///     offset plus the length of the C string is out of bounds,
+  ///     a default-initialized StringRef will be returned.
+  StringRef getFixedLengthString(uint64_t *OffsetPtr,
+      uint64_t Length, StringRef TrimChars = {"\0", 1}) const;
+
+  /// Extract a fixed number of bytes from the specified offset.
+  ///
+  /// Returns a StringRef for the bytes from the data at the offset
+  /// pointed to by \a OffsetPtr. A fixed length C string will be extracted
+  /// and the \a OffsetPtr will be advanced by \a Length bytes.
+  ///
+  /// \param[in,out] OffsetPtr
+  ///     A pointer to an offset within the data that will be advanced
+  ///     by the appropriate number of bytes if the value is extracted
+  ///     correctly. If the offset is out of bounds or there are not
+  ///     enough bytes to extract this value, the offset will be left
+  ///     unmodified.
+  ///
+  /// \param[in] Length
+  ///     The number of bytes to extract. If there are not enough bytes in the
+  ///     data to extract all of the bytes, the offset will be left unmodified.
+  ///
+  /// \return
+  ///     A StringRef for the extracted bytes. If the offset pointed to by
+  ///     \a OffsetPtr is out of bounds, or if the offset plus the length
+  ///     is out of bounds, a default-initialized StringRef will be returned.
+  StringRef getBytes(uint64_t *OffsetPtr, uint64_t Length) const;
+
   /// Extract an unsigned integer of size \a byte_size from \a
   /// *offset_ptr.
   ///

diff  --git a/llvm/lib/Support/DataExtractor.cpp b/llvm/lib/Support/DataExtractor.cpp
index a98297cdb35f..3d19b4d481bb 100644
--- a/llvm/lib/Support/DataExtractor.cpp
+++ b/llvm/lib/Support/DataExtractor.cpp
@@ -171,6 +171,21 @@ StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const {
   return StringRef();
 }
 
+StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
+                                              uint64_t Length,
+                                              StringRef TrimChars) const {
+  StringRef Bytes(getBytes(OffsetPtr, Length));
+  return Bytes.trim(TrimChars);
+}
+
+StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length) const {
+  if (!isValidOffsetForDataOfSize(*OffsetPtr, Length))
+    return StringRef();
+  StringRef Result = Data.substr(*OffsetPtr, Length);
+  *OffsetPtr += Length;
+  return Result;
+}
+
 uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr,
                                    llvm::Error *Err) const {
   assert(*offset_ptr <= Data.size());

diff  --git a/llvm/unittests/Support/DataExtractorTest.cpp b/llvm/unittests/Support/DataExtractorTest.cpp
index cdb8bfdcb8b7..35b25dd389bf 100644
--- a/llvm/unittests/Support/DataExtractorTest.cpp
+++ b/llvm/unittests/Support/DataExtractorTest.cpp
@@ -278,4 +278,51 @@ TEST(DataExtractorTest, size) {
   DataExtractor DE2(ArrayRef<uint8_t>(Data), false, 8);
   EXPECT_EQ(DE2.size(), sizeof(Data));
 }
+
+TEST(DataExtractorTest, FixedLengthString) {
+  const char Data[] = "hello\x00\x00\x00world  \thola\x00";
+  DataExtractor DE(StringRef(Data, sizeof(Data)-1), false, 8);
+  uint64_t Offset = 0;
+  StringRef Str;
+  // Test extracting too many bytes doesn't modify Offset and returns None.
+  Str = DE.getFixedLengthString(&Offset, sizeof(Data));
+  EXPECT_TRUE(Str.empty());
+  EXPECT_EQ(Offset, 0u);
+
+  // Test extracting a fixed width C string with trailing NULL characters.
+  Str = DE.getFixedLengthString(&Offset, 8);
+  EXPECT_EQ(Offset, 8u);
+  EXPECT_EQ(Str.size(), 5u);
+  EXPECT_EQ(Str, "hello");
+  // Test extracting a fixed width C string with trailing space and tab
+  // characters.
+  Str = DE.getFixedLengthString(&Offset, 8, " \t");
+  EXPECT_EQ(Offset, 16u);
+  EXPECT_EQ(Str.size(), 5u);
+  EXPECT_EQ(Str, "world");
+  // Now extract a normal C string.
+  Str = DE.getCStrRef(&Offset);
+  EXPECT_EQ(Str.size(), 4u);
+  EXPECT_EQ(Str, "hola");
+}
+
+
+TEST(DataExtractorTest, GetBytes) {
+  // Use data with an embedded NULL character for good measure.
+  const char Data[] = "\x01\x02\x00\x04";
+  StringRef Bytes(Data, sizeof(Data)-1);
+  DataExtractor DE(Bytes, false, 8);
+  uint64_t Offset = 0;
+  StringRef Str;
+  // Test extracting too many bytes doesn't modify Offset and returns None.
+  Str = DE.getBytes(&Offset, sizeof(Data));
+  EXPECT_TRUE(Str.empty());
+  EXPECT_EQ(Offset, 0u);
+  // Test extracting 4 bytes from the stream.
+  Str = DE.getBytes(&Offset, 4);
+  EXPECT_EQ(Offset, 4u);
+  EXPECT_EQ(Str.size(), 4u);
+  EXPECT_EQ(Str, Bytes);
+}
+
 }


        


More information about the llvm-commits mailing list