[lld] 330268b - [Support/Hash functions] Change the `final()` and `result()` of the hashing functions to return an array of bytes

Argyrios Kyrtzidis via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 5 21:38:13 PDT 2022


Author: Argyrios Kyrtzidis
Date: 2022-04-05T21:38:06-07:00
New Revision: 330268ba346b679af786879d8f696c8c412a40eb

URL: https://github.com/llvm/llvm-project/commit/330268ba346b679af786879d8f696c8c412a40eb
DIFF: https://github.com/llvm/llvm-project/commit/330268ba346b679af786879d8f696c8c412a40eb.diff

LOG: [Support/Hash functions] Change the `final()` and `result()` of the hashing functions to return an array of bytes

Returning `std::array<uint8_t, N>` is better ergonomics for the hashing functions usage, instead of a `StringRef`:

* When returning `StringRef`, client code is "jumping through hoops" to do string manipulations instead of dealing with fixed array of bytes directly, which is more natural
* Returning `std::array<uint8_t, N>` avoids the need for the hasher classes to keep a field just for the purpose of wrapping it and returning it as a `StringRef`

As part of this patch also:

* Introduce `TruncatedBLAKE3` which is useful for using BLAKE3 as the hasher type for `HashBuilder` with non-default hash sizes.
* Make `MD5Result` inherit from `std::array<uint8_t, 16>` which improves & simplifies its API.

Differential Revision: https://reviews.llvm.org/D123100

Added: 
    

Modified: 
    bolt/lib/Core/DebugData.cpp
    clang/include/clang/Basic/Module.h
    clang/lib/Serialization/ASTWriter.cpp
    lld/MachO/SyntheticSections.cpp
    llvm/include/llvm/Support/BLAKE3.h
    llvm/include/llvm/Support/HashBuilder.h
    llvm/include/llvm/Support/MD5.h
    llvm/include/llvm/Support/SHA1.h
    llvm/include/llvm/Support/SHA256.h
    llvm/include/llvm/Support/raw_sha1_ostream.h
    llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
    llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
    llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/lib/DebugInfo/CodeView/TypeHashing.cpp
    llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
    llvm/lib/MC/MCDwarf.cpp
    llvm/lib/MC/MCParser/AsmParser.cpp
    llvm/lib/MC/MCParser/MasmParser.cpp
    llvm/lib/ObjCopy/MachO/MachOWriter.cpp
    llvm/lib/Support/BLAKE3/README.md
    llvm/lib/Support/MD5.cpp
    llvm/lib/Support/SHA1.cpp
    llvm/lib/Support/SHA256.cpp
    llvm/unittests/Support/BLAKE3Test.cpp
    llvm/unittests/Support/HashBuilderTest.cpp
    llvm/unittests/Support/MD5Test.cpp
    llvm/unittests/Support/SHA256.cpp
    llvm/unittests/Support/raw_sha1_ostream_test.cpp
    mlir/lib/Pass/IRPrinting.cpp

Removed: 
    


################################################################################
diff  --git a/bolt/lib/Core/DebugData.cpp b/bolt/lib/Core/DebugData.cpp
index ba6a653cb88be..2ebd2c03838dd 100644
--- a/bolt/lib/Core/DebugData.cpp
+++ b/bolt/lib/Core/DebugData.cpp
@@ -820,7 +820,8 @@ void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
   auto hashAndAddAbbrev = [&](StringRef AbbrevData) -> bool {
     llvm::SHA1 Hasher;
     Hasher.update(AbbrevData);
-    StringRef Key = Hasher.final();
+    std::array<uint8_t, 20> Hash = Hasher.final();
+    StringRef Key((const char *)Hash.data(), Hash.size());
     auto Iter = AbbrevDataCache.find(Key);
     if (Iter != AbbrevDataCache.end()) {
       UnitsAbbrevData[&Unit] = Iter->second.get();

diff  --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h
index af96038ba4aa8..45c23d5b7988e 100644
--- a/clang/include/clang/Basic/Module.h
+++ b/clang/include/clang/Basic/Module.h
@@ -71,8 +71,8 @@ struct ASTFileSignature : std::array<uint8_t, 20> {
     return Value;
   }
 
-  static ASTFileSignature create(StringRef Bytes) {
-    return create(Bytes.bytes_begin(), Bytes.bytes_end());
+  static ASTFileSignature create(std::array<uint8_t, 20> Bytes) {
+    return ASTFileSignature(std::move(Bytes));
   }
 
   static ASTFileSignature createDISentinel() {

diff  --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index b6860619470d9..5fd4e8fb27cf8 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1117,8 +1117,7 @@ std::pair<ASTFileSignature, ASTFileSignature>
 ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
   llvm::SHA1 Hasher;
   Hasher.update(ASTBlockBytes);
-  auto Hash = Hasher.result();
-  ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hash);
+  ASTFileSignature ASTBlockHash = ASTFileSignature::create(Hasher.result());
 
   // Add the remaining bytes (i.e. bytes before the unhashed control block that
   // are not part of the AST block).
@@ -1126,8 +1125,7 @@ ASTWriter::createSignature(StringRef AllBytes, StringRef ASTBlockBytes) {
       AllBytes.take_front(ASTBlockBytes.bytes_end() - AllBytes.bytes_begin()));
   Hasher.update(
       AllBytes.take_back(AllBytes.bytes_end() - ASTBlockBytes.bytes_end()));
-  Hash = Hasher.result();
-  ASTFileSignature Signature = ASTFileSignature::create(Hash);
+  ASTFileSignature Signature = ASTFileSignature::create(Hasher.result());
 
   return std::make_pair(ASTBlockHash, Signature);
 }

diff  --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp
index e47c7e325a343..a4de385e04f00 100644
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1202,7 +1202,7 @@ void CodeSignatureSection::writeHashes(uint8_t *buf) const {
                     std::min(codeEnd - code, static_cast<ssize_t>(blockSize)));
     SHA256 hasher;
     hasher.update(block);
-    StringRef hash = hasher.final();
+    std::array<uint8_t, 32> hash = hasher.final();
     assert(hash.size() == hashSize);
     memcpy(hashes, hash.data(), hashSize);
     code += blockSize;

diff  --git a/llvm/include/llvm/Support/BLAKE3.h b/llvm/include/llvm/Support/BLAKE3.h
index ade5201904655..7b30dbccd1734 100644
--- a/llvm/include/llvm/Support/BLAKE3.h
+++ b/llvm/include/llvm/Support/BLAKE3.h
@@ -34,7 +34,7 @@ namespace llvm {
 template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
 using BLAKE3Result = std::array<uint8_t, NumBytes>;
 
-/// A class that wrap the BLAKE3 algorithm.
+/// A class that wraps the BLAKE3 algorithm.
 class BLAKE3 {
 public:
   BLAKE3() { init(); }
@@ -70,6 +70,17 @@ class BLAKE3 {
     return Result;
   }
 
+  /// Return the current output for the digested data since the last call to
+  /// init().
+  ///
+  /// Other hash functions distinguish between \p result() and \p final(), with
+  /// \p result() allowing more calls into \p update(), but there's no
+  // 
diff erence for the BLAKE3 hash function.
+  template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+  BLAKE3Result<NumBytes> result() {
+    return final<NumBytes>();
+  }
+
   /// Returns a BLAKE3 hash for the given data.
   template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
   static BLAKE3Result<NumBytes> hash(ArrayRef<uint8_t> Data) {
@@ -82,6 +93,32 @@ class BLAKE3 {
   llvm_blake3_hasher Hasher;
 };
 
+/// Like \p BLAKE3 but using a class-level template parameter for specifying the
+/// hash size of the \p final() and \p result() functions.
+///
+/// This is useful for using BLAKE3 as the hasher type for \p HashBuilder with
+/// non-default hash sizes.
+template <size_t NumBytes> class TruncatedBLAKE3 : public BLAKE3 {
+public:
+  /// Finalize the hasher and put the result in \p Result.
+  /// This doesn't modify the hasher itself, and it's possible to finalize again
+  /// after adding more input.
+  void final(BLAKE3Result<NumBytes> &Result) { return BLAKE3::final(Result); }
+
+  /// Finalize the hasher and return an output of any length, given in bytes.
+  /// This doesn't modify the hasher itself, and it's possible to finalize again
+  /// after adding more input.
+  BLAKE3Result<NumBytes> final() { return BLAKE3::final<NumBytes>(); }
+
+  /// Return the current output for the digested data since the last call to
+  /// init().
+  ///
+  /// Other hash functions distinguish between \p result() and \p final(), with
+  /// \p result() allowing more calls into \p update(), but there's no
+  // 
diff erence for the BLAKE3 hash function.
+  BLAKE3Result<NumBytes> result() { return BLAKE3::result<NumBytes>(); }
+};
+
 } // namespace llvm
 
 #endif

diff  --git a/llvm/include/llvm/Support/HashBuilder.h b/llvm/include/llvm/Support/HashBuilder.h
index bf93a0d22da73..9d7680d2b6677 100644
--- a/llvm/include/llvm/Support/HashBuilder.h
+++ b/llvm/include/llvm/Support/HashBuilder.h
@@ -39,6 +39,9 @@ struct IsHashableData
 /// Declares the hasher member, and functions forwarding directly to the hasher.
 template <typename HasherT> class HashBuilderBase {
 public:
+  template <typename HasherT_ = HasherT>
+  using HashResultTy = decltype(std::declval<HasherT_ &>().final());
+
   HasherT &getHasher() { return Hasher; }
 
   /// Forward to `HasherT::update(ArrayRef<uint8_t>)`.
@@ -59,12 +62,12 @@ template <typename HasherT> class HashBuilderBase {
   }
 
   /// Forward to `HasherT::final()` if available.
-  template <typename HasherT_ = HasherT> StringRef final() {
+  template <typename HasherT_ = HasherT> HashResultTy<HasherT_> final() {
     return this->getHasher().final();
   }
 
   /// Forward to `HasherT::result()` if available.
-  template <typename HasherT_ = HasherT> StringRef result() {
+  template <typename HasherT_ = HasherT> HashResultTy<HasherT_> result() {
     return this->getHasher().result();
   }
 

diff  --git a/llvm/include/llvm/Support/MD5.h b/llvm/include/llvm/Support/MD5.h
index 70d0466013461..fa2f477261dd9 100644
--- a/llvm/include/llvm/Support/MD5.h
+++ b/llvm/include/llvm/Support/MD5.h
@@ -40,26 +40,19 @@ template <typename T> class ArrayRef;
 
 class MD5 {
 public:
-  struct MD5Result {
-    std::array<uint8_t, 16> Bytes;
-
-    operator std::array<uint8_t, 16>() const { return Bytes; }
-
-    const uint8_t &operator[](size_t I) const { return Bytes[I]; }
-    uint8_t &operator[](size_t I) { return Bytes[I]; }
-
+  struct MD5Result : public std::array<uint8_t, 16> {
     SmallString<32> digest() const;
 
     uint64_t low() const {
       // Our MD5 implementation returns the result in little endian, so the low
       // word is first.
       using namespace support;
-      return endian::read<uint64_t, little, unaligned>(Bytes.data());
+      return endian::read<uint64_t, little, unaligned>(data());
     }
 
     uint64_t high() const {
       using namespace support;
-      return endian::read<uint64_t, little, unaligned>(Bytes.data() + 8);
+      return endian::read<uint64_t, little, unaligned>(data() + 8);
     }
     std::pair<uint64_t, uint64_t> words() const {
       using namespace support;
@@ -78,20 +71,20 @@ class MD5 {
   /// Finishes off the hash and puts the result in result.
   void final(MD5Result &Result);
 
-  /// Finishes off the hash, and returns a reference to the 16-byte hash data.
-  StringRef final();
+  /// Finishes off the hash, and returns the 16-byte hash data.
+  MD5Result final();
 
-  /// Finishes off the hash, and returns a reference to the 16-byte hash data.
+  /// Finishes off the hash, and returns the 16-byte hash data.
   /// This is suitable for getting the MD5 at any time without invalidating the
   /// internal state, so that more calls can be made into `update`.
-  StringRef result();
+  MD5Result result();
 
   /// Translates the bytes in \p Res to a hex string that is
   /// deposited into \p Str. The result will be of length 32.
   static void stringifyResult(MD5Result &Result, SmallVectorImpl<char> &Str);
 
   /// Computes the hash for a given bytes.
-  static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data);
+  static MD5Result hash(ArrayRef<uint8_t> Data);
 
 private:
   // Any 32-bit or wider unsigned integer data type will do.
@@ -109,15 +102,9 @@ class MD5 {
     MD5_u32plus block[16];
   } InternalState;
 
-  MD5Result Result;
-
   const uint8_t *body(ArrayRef<uint8_t> Data);
 };
 
-inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) {
-  return LHS.Bytes == RHS.Bytes;
-}
-
 /// Helper to compute and return lower 64 bits of the given string's MD5 hash.
 inline uint64_t MD5Hash(StringRef Str) {
   using namespace support;

diff  --git a/llvm/include/llvm/Support/SHA1.h b/llvm/include/llvm/Support/SHA1.h
index efd8513cc201f..ae6d62aed723a 100644
--- a/llvm/include/llvm/Support/SHA1.h
+++ b/llvm/include/llvm/Support/SHA1.h
@@ -36,17 +36,17 @@ class SHA1 {
   /// Digest more data.
   void update(StringRef Str);
 
-  /// Return a reference to the current raw 160-bits SHA1 for the digested data
+  /// Return the current raw 160-bits SHA1 for the digested data
   /// since the last call to init(). This call will add data to the internal
   /// state and as such is not suited for getting an intermediate result
   /// (see result()).
-  StringRef final();
+  std::array<uint8_t, 20> final();
 
-  /// Return a reference to the current raw 160-bits SHA1 for the digested data
+  /// Return the current raw 160-bits SHA1 for the digested data
   /// since the last call to init(). This is suitable for getting the SHA1 at
   /// any time without invalidating the internal state so that more calls can be
   /// made into update.
-  StringRef result();
+  std::array<uint8_t, 20> result();
 
   /// Returns a raw 160-bit SHA1 hash for the given data.
   static std::array<uint8_t, 20> hash(ArrayRef<uint8_t> Data);
@@ -68,14 +68,13 @@ class SHA1 {
     uint8_t BufferOffset;
   } InternalState;
 
-  // Internal copy of the hash, populated and accessed on calls to result()
-  uint32_t HashResult[HASH_LENGTH / 4];
-
   // Helper
   void writebyte(uint8_t data);
   void hashBlock();
   void addUncounted(uint8_t data);
   void pad();
+
+  void final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult);
 };
 
 } // end llvm namespace

diff  --git a/llvm/include/llvm/Support/SHA256.h b/llvm/include/llvm/Support/SHA256.h
index 9e295b0b9fae5..68b32c7b48348 100644
--- a/llvm/include/llvm/Support/SHA256.h
+++ b/llvm/include/llvm/Support/SHA256.h
@@ -43,17 +43,17 @@ class SHA256 {
   /// Digest more data.
   void update(StringRef Str);
 
-  /// Return a reference to the current raw 256-bits SHA256 for the digested
+  /// Return the current raw 256-bits SHA256 for the digested
   /// data since the last call to init(). This call will add data to the
   /// internal state and as such is not suited for getting an intermediate
   /// result (see result()).
-  StringRef final();
+  std::array<uint8_t, 32> final();
 
-  /// Return a reference to the current raw 256-bits SHA256 for the digested
+  /// Return the current raw 256-bits SHA256 for the digested
   /// data since the last call to init(). This is suitable for getting the
   /// SHA256 at any time without invalidating the internal state so that more
   /// calls can be made into update.
-  StringRef result();
+  std::array<uint8_t, 32> result();
 
   /// Returns a raw 256-bit SHA256 hash for the given data.
   static std::array<uint8_t, 32> hash(ArrayRef<uint8_t> Data);
@@ -75,14 +75,13 @@ class SHA256 {
     uint8_t BufferOffset;
   } InternalState;
 
-  // Internal copy of the hash, populated and accessed on calls to result()
-  uint32_t HashResult[HASH_LENGTH / 4];
-
   // Helper
   void writebyte(uint8_t data);
   void hashBlock();
   void addUncounted(uint8_t data);
   void pad();
+
+  void final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult);
 };
 
 } // namespace llvm

diff  --git a/llvm/include/llvm/Support/raw_sha1_ostream.h b/llvm/include/llvm/Support/raw_sha1_ostream.h
index 3991691796b59..299f6e6b5e886 100644
--- a/llvm/include/llvm/Support/raw_sha1_ostream.h
+++ b/llvm/include/llvm/Support/raw_sha1_ostream.h
@@ -30,7 +30,7 @@ class raw_sha1_ostream : public raw_ostream {
 
 public:
   /// Return the current SHA1 hash for the content of the stream
-  StringRef sha1() {
+  std::array<uint8_t, 20> sha1() {
     flush();
     return State.result();
   }

diff  --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index fdb07cf282a93..f291827d7f6f3 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -903,7 +903,7 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
         else {
           // Recompute the hash and compare it to the one in the bitcode
           SHA1 Hasher;
-          StringRef Hash;
+          std::array<uint8_t, 20> Hash;
           Hasher.update(*CheckHash);
           {
             int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
@@ -911,14 +911,14 @@ Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel,
             Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
             Hash = Hasher.result();
           }
-          std::array<char, 20> RecordedHash;
+          std::array<uint8_t, 20> RecordedHash;
           int Pos = 0;
           for (auto &Val : Record) {
             assert(!(Val >> 32) && "Unexpected high bits set");
             support::endian::write32be(&RecordedHash[Pos], Val);
             Pos += 4;
           }
-          if (Hash == StringRef(RecordedHash.data(), RecordedHash.size()))
+          if (Hash == RecordedHash)
             O->OS << " (match)";
           else
             O->OS << " (!mismatch!)";

diff  --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 8a1652478aaed..c76294d5a6d9f 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -4387,7 +4387,7 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
     uint32_t Vals[5];
     Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos],
                                     Buffer.size() - BlockStartPos));
-    StringRef Hash = Hasher.result();
+    std::array<uint8_t, 20> Hash = Hasher.result();
     for (int Pos = 0; Pos < 20; Pos += 4) {
       Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos);
     }

diff  --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 850a5e11f97dd..37ae84ad9bf17 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3536,6 +3536,6 @@ Optional<MD5::MD5Result> DwarfDebug::getMD5AsBytes(const DIFile *File) const {
   // An MD5 checksum is 16 bytes.
   std::string ChecksumString = fromHex(Checksum->Value);
   MD5::MD5Result CKMem;
-  std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+  std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.data());
   return CKMem;
 }

diff  --git a/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp b/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp
index 2dbc11a84f0b0..fc85d8186eaad 100644
--- a/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp
+++ b/llvm/lib/DebugInfo/CodeView/TypeHashing.cpp
@@ -76,5 +76,6 @@ GloballyHashedType::hashType(ArrayRef<uint8_t> RecordData,
   auto TrailingBytes = RecordData.drop_front(Off);
   S.update(TrailingBytes);
 
-  return {S.final().take_back(8)};
+  std::array<uint8_t, 20> Hash = S.final();
+  return {ArrayRef<uint8_t>(Hash).take_back(8)};
 }

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 3df22c0ec2395..81394e2b24b74 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -341,7 +341,7 @@ parseV5DirFileTables(const DWARFDataExtractor &DebugLineData,
               errc::invalid_argument,
               "failed to parse file entry because the MD5 hash is invalid");
         std::uninitialized_copy_n(Value.getAsBlock().getValue().begin(), 16,
-                                  FileEntry.Checksum.Bytes.begin());
+                                  FileEntry.Checksum.begin());
         break;
       default:
         break;

diff  --git a/llvm/lib/MC/MCDwarf.cpp b/llvm/lib/MC/MCDwarf.cpp
index 2cb5a000f88a7..f2e92fe007a8a 100644
--- a/llvm/lib/MC/MCDwarf.cpp
+++ b/llvm/lib/MC/MCDwarf.cpp
@@ -387,8 +387,7 @@ static void emitOneV5FileEntry(MCStreamer *MCOS, const MCDwarfFile &DwarfFile,
   if (EmitMD5) {
     const MD5::MD5Result &Cksum = *DwarfFile.Checksum;
     MCOS->emitBinaryData(
-        StringRef(reinterpret_cast<const char *>(Cksum.Bytes.data()),
-                  Cksum.Bytes.size()));
+        StringRef(reinterpret_cast<const char *>(Cksum.data()), Cksum.size()));
   }
   if (HasSource) {
     if (LineStr)

diff  --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 9632f00379bfd..defc86e06aa60 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3573,8 +3573,8 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
     if (HasMD5) {
       MD5::MD5Result Sum;
       for (unsigned i = 0; i != 8; ++i) {
-        Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
-        Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
+        Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
+        Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
       }
       CKMem = Sum;
     }

diff  --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index aa4cc0271b97a..be478e2fac5d8 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -4907,8 +4907,8 @@ bool MasmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
     if (HasMD5) {
       MD5::MD5Result Sum;
       for (unsigned i = 0; i != 8; ++i) {
-        Sum.Bytes[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
-        Sum.Bytes[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
+        Sum[i] = uint8_t(MD5Hi >> ((7 - i) * 8));
+        Sum[i + 8] = uint8_t(MD5Lo >> ((7 - i) * 8));
       }
       CKMem = Sum;
     }

diff  --git a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp
index 2a2eda45db390..d5d7c0275d351 100644
--- a/llvm/lib/ObjCopy/MachO/MachOWriter.cpp
+++ b/llvm/lib/ObjCopy/MachO/MachOWriter.cpp
@@ -570,7 +570,7 @@ void MachOWriter::writeCodeSignatureData() {
                              static_cast<ssize_t>(CodeSignature.BlockSize)));
     SHA256 Hasher;
     Hasher.update(Block);
-    StringRef Hash = Hasher.final();
+    std::array<uint8_t, 32> Hash = Hasher.final();
     assert(Hash.size() == CodeSignature.HashSize);
     memcpy(CurrHashWritePosition, Hash.data(), CodeSignature.HashSize);
     CurrHashReadPosition += CodeSignature.BlockSize;

diff  --git a/llvm/lib/Support/BLAKE3/README.md b/llvm/lib/Support/BLAKE3/README.md
index c4a87c7806410..319a7514e8b50 100644
--- a/llvm/lib/Support/BLAKE3/README.md
+++ b/llvm/lib/Support/BLAKE3/README.md
@@ -152,7 +152,7 @@ template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
 void BLAKE3::final(BLAKE3Result<NumBytes> &Result);
 
 template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
-BLAKE3Result<NumBytes> final();
+BLAKE3Result<NumBytes> BLAKE3::final();
 ```
 ```c
 void llvm_blake3_hasher_finalize(

diff  --git a/llvm/lib/Support/MD5.cpp b/llvm/lib/Support/MD5.cpp
index caadde3895043..fdcf34d70ad97 100644
--- a/llvm/lib/Support/MD5.cpp
+++ b/llvm/lib/Support/MD5.cpp
@@ -261,13 +261,13 @@ void MD5::final(MD5Result &Result) {
   support::endian::write32le(&Result[12], InternalState.d);
 }
 
-StringRef MD5::final() {
+MD5::MD5Result MD5::final() {
+  MD5Result Result;
   final(Result);
-  return StringRef(reinterpret_cast<char *>(Result.Bytes.data()),
-                   Result.Bytes.size());
+  return Result;
 }
 
-StringRef MD5::result() {
+MD5::MD5Result MD5::result() {
   auto StateToRestore = InternalState;
 
   auto Hash = final();
@@ -280,15 +280,15 @@ StringRef MD5::result() {
 
 SmallString<32> MD5::MD5Result::digest() const {
   SmallString<32> Str;
-  toHex(Bytes, /*LowerCase*/ true, Str);
+  toHex(*this, /*LowerCase*/ true, Str);
   return Str;
 }
 
 void MD5::stringifyResult(MD5Result &Result, SmallVectorImpl<char> &Str) {
-  toHex(Result.Bytes, /*LowerCase*/ true, Str);
+  toHex(Result, /*LowerCase*/ true, Str);
 }
 
-std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
+MD5::MD5Result MD5::hash(ArrayRef<uint8_t> Data) {
   MD5 Hash;
   Hash.update(Data);
   MD5::MD5Result Res;

diff  --git a/llvm/lib/Support/SHA1.cpp b/llvm/lib/Support/SHA1.cpp
index 5dce44af9ecd8..52bae700350d6 100644
--- a/llvm/lib/Support/SHA1.cpp
+++ b/llvm/lib/Support/SHA1.cpp
@@ -263,7 +263,7 @@ void SHA1::pad() {
   addUncounted(InternalState.ByteCount << 3);
 }
 
-StringRef SHA1::final() {
+void SHA1::final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult) {
   // Pad to complete the last block
   pad();
 
@@ -281,12 +281,19 @@ StringRef SHA1::final() {
                     (((InternalState.State[i]) >> 24) & 0x000000ff);
   }
 #endif
+}
 
-  // Return pointer to hash (20 characters)
-  return StringRef((char *)HashResult, HASH_LENGTH);
+std::array<uint8_t, 20> SHA1::final() {
+  union {
+    std::array<uint32_t, HASH_LENGTH / 4> HashResult;
+    std::array<uint8_t, HASH_LENGTH> ReturnResult;
+  };
+  static_assert(sizeof(HashResult) == sizeof(ReturnResult), "");
+  final(HashResult);
+  return ReturnResult;
 }
 
-StringRef SHA1::result() {
+std::array<uint8_t, 20> SHA1::result() {
   auto StateToRestore = InternalState;
 
   auto Hash = final();
@@ -301,9 +308,5 @@ StringRef SHA1::result() {
 std::array<uint8_t, 20> SHA1::hash(ArrayRef<uint8_t> Data) {
   SHA1 Hash;
   Hash.update(Data);
-  StringRef S = Hash.final();
-
-  std::array<uint8_t, 20> Arr;
-  memcpy(Arr.data(), S.data(), S.size());
-  return Arr;
+  return Hash.final();
 }

diff  --git a/llvm/lib/Support/SHA256.cpp b/llvm/lib/Support/SHA256.cpp
index 3b81506847ec8..81d897fb41874 100644
--- a/llvm/lib/Support/SHA256.cpp
+++ b/llvm/lib/Support/SHA256.cpp
@@ -243,7 +243,7 @@ void SHA256::pad() {
   addUncounted(len);
 }
 
-StringRef SHA256::final() {
+void SHA256::final(std::array<uint32_t, HASH_LENGTH / 4> &HashResult) {
   // Pad to complete the last block
   pad();
 
@@ -261,12 +261,19 @@ StringRef SHA256::final() {
                     (((InternalState.State[i]) >> 24) & 0x000000ff);
   }
 #endif
+}
 
-  // Return pointer to hash (32 characters)
-  return StringRef((char *)HashResult, HASH_LENGTH);
+std::array<uint8_t, 32> SHA256::final() {
+  union {
+    std::array<uint32_t, HASH_LENGTH / 4> HashResult;
+    std::array<uint8_t, HASH_LENGTH> ReturnResult;
+  };
+  static_assert(sizeof(HashResult) == sizeof(ReturnResult), "");
+  final(HashResult);
+  return ReturnResult;
 }
 
-StringRef SHA256::result() {
+std::array<uint8_t, 32> SHA256::result() {
   auto StateToRestore = InternalState;
 
   auto Hash = final();
@@ -281,11 +288,7 @@ StringRef SHA256::result() {
 std::array<uint8_t, 32> SHA256::hash(ArrayRef<uint8_t> Data) {
   SHA256 Hash;
   Hash.update(Data);
-  StringRef S = Hash.final();
-
-  std::array<uint8_t, 32> Arr;
-  memcpy(Arr.data(), S.data(), S.size());
-  return Arr;
+  return Hash.final();
 }
 
 } // namespace llvm

diff  --git a/llvm/unittests/Support/BLAKE3Test.cpp b/llvm/unittests/Support/BLAKE3Test.cpp
index 197271a12804d..a729524112e32 100644
--- a/llvm/unittests/Support/BLAKE3Test.cpp
+++ b/llvm/unittests/Support/BLAKE3Test.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/Support/BLAKE3.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/HashBuilder.h"
 #include "gtest/gtest.h"
 
 using namespace llvm;
@@ -59,6 +60,14 @@ TEST(BLAKE3Test, BLAKE3) {
   auto hashStr = toHex(hash);
   EXPECT_EQ(hashStr,
             "616F575A1B58D4C9797D4217B9730AE5E6EB319D76EDEF6549B46F4EFE31FF8B");
+
+  // Using generic HashBuilder.
+  HashBuilder<BLAKE3, support::endianness::native> HashBuilder;
+  HashBuilder.update(std::get<0>(testvectors[2]));
+  BLAKE3Result<> HBHash1 = HashBuilder.final();
+  BLAKE3Result<> HBHash2 = HashBuilder.result();
+  EXPECT_EQ(std::get<1>(testvectors[2]), toHex(HBHash1));
+  EXPECT_EQ(std::get<1>(testvectors[2]), toHex(HBHash2));
 }
 
 TEST(BLAKE3Test, SmallerHashSize) {
@@ -73,6 +82,14 @@ TEST(BLAKE3Test, SmallerHashSize) {
   auto hashStr2 = toHex(hash2);
   EXPECT_EQ(hashStr1, hashStr2);
   EXPECT_EQ(hashStr1, "6437B3AC38465133FFB63B75273A8DB5");
+
+  // Using generic HashBuilder.
+  HashBuilder<TruncatedBLAKE3<16>, support::endianness::native> HashBuilder;
+  HashBuilder.update(Input);
+  BLAKE3Result<16> hash3 = HashBuilder.final();
+  BLAKE3Result<16> hash4 = HashBuilder.result();
+  EXPECT_EQ(hashStr1, toHex(hash3));
+  EXPECT_EQ(hashStr1, toHex(hash4));
 }
 
 } // namespace

diff  --git a/llvm/unittests/Support/HashBuilderTest.cpp b/llvm/unittests/Support/HashBuilderTest.cpp
index d644107bbcf18..26007442a3394 100644
--- a/llvm/unittests/Support/HashBuilderTest.cpp
+++ b/llvm/unittests/Support/HashBuilderTest.cpp
@@ -44,13 +44,15 @@ using HashBuilder = llvm::HashBuilder<typename HasherTAndEndianness::HasherT,
                                       HasherTAndEndianness::Endianness>;
 
 template <typename HasherTAndEndianness, typename... Ts>
-static std::string hashWithBuilder(const Ts &...Args) {
-  return HashBuilder<HasherTAndEndianness>().add(Args...).final().str();
+static typename HashBuilder<HasherTAndEndianness>::template HashResultTy<>
+hashWithBuilder(const Ts &...Args) {
+  return HashBuilder<HasherTAndEndianness>().add(Args...).final();
 }
 
 template <typename HasherTAndEndianness, typename... Ts>
-static std::string hashRangeWithBuilder(const Ts &...Args) {
-  return HashBuilder<HasherTAndEndianness>().addRange(Args...).final().str();
+static typename HashBuilder<HasherTAndEndianness>::template HashResultTy<>
+hashRangeWithBuilder(const Ts &...Args) {
+  return HashBuilder<HasherTAndEndianness>().addRange(Args...).final();
 }
 
 // All the test infrastructure relies on the variadic helpers. Test them first.
@@ -102,7 +104,7 @@ TYPED_TEST(HashBuilderTest, AddHashableData) {
     auto SwappedData = llvm::support::endian::byte_swap(Data, E);
     Hasher.update(llvm::makeArrayRef(
         reinterpret_cast<const uint8_t *>(&SwappedData), sizeof(Data)));
-    return static_cast<std::string>(Hasher.final());
+    return Hasher.final();
   };
 
   char C = 'c';

diff  --git a/llvm/unittests/Support/MD5Test.cpp b/llvm/unittests/Support/MD5Test.cpp
index 8eb4d3422cb62..83c556cbc5bd9 100644
--- a/llvm/unittests/Support/MD5Test.cpp
+++ b/llvm/unittests/Support/MD5Test.cpp
@@ -62,7 +62,7 @@ TEST(MD5HashTest, MD5) {
   std::array<uint8_t, 16> Vec = MD5::hash(Input);
   MD5::MD5Result MD5Res;
   SmallString<32> Res;
-  memcpy(MD5Res.Bytes.data(), Vec.data(), Vec.size());
+  memcpy(MD5Res.data(), Vec.data(), Vec.size());
   MD5::stringifyResult(MD5Res, Res);
   EXPECT_EQ(Res, "c3fcd3d76192e4007dfb496cca67e13b");
   EXPECT_EQ(0x3be167ca6c49fb7dULL, MD5Res.high());
@@ -79,10 +79,7 @@ TEST(MD5Test, FinalAndResultHelpers) {
     ReferenceHash.update("abcd");
     MD5::MD5Result ReferenceResult;
     ReferenceHash.final(ReferenceResult);
-    StringRef ExpectedResult =
-        StringRef(reinterpret_cast<char *>(ReferenceResult.Bytes.data()),
-                  ReferenceResult.Bytes.size());
-    EXPECT_EQ(Hash.result(), ExpectedResult);
+    EXPECT_EQ(Hash.result(), ReferenceResult);
   }
 
   Hash.update("xyz");
@@ -93,10 +90,7 @@ TEST(MD5Test, FinalAndResultHelpers) {
     ReferenceHash.update("xyz");
     MD5::MD5Result ReferenceResult;
     ReferenceHash.final(ReferenceResult);
-    StringRef ExpectedResult =
-        StringRef(reinterpret_cast<char *>(ReferenceResult.Bytes.data()),
-                  ReferenceResult.Bytes.size());
-    EXPECT_EQ(Hash.final(), ExpectedResult);
+    EXPECT_EQ(Hash.final(), ReferenceResult);
   }
 }
 } // namespace

diff  --git a/llvm/unittests/Support/SHA256.cpp b/llvm/unittests/Support/SHA256.cpp
index 82359a7177b9b..a50ce7ba64cd2 100644
--- a/llvm/unittests/Support/SHA256.cpp
+++ b/llvm/unittests/Support/SHA256.cpp
@@ -20,7 +20,7 @@ using namespace llvm;
 
 namespace {
 
-static std::string toHex(StringRef Input) {
+static std::string toHex(ArrayRef<uint8_t> Input) {
   static const char *const LUT = "0123456789abcdef";
   size_t Length = Input.size();
 

diff  --git a/llvm/unittests/Support/raw_sha1_ostream_test.cpp b/llvm/unittests/Support/raw_sha1_ostream_test.cpp
index 3238d0d0e88ff..a3cb6f58d3e29 100644
--- a/llvm/unittests/Support/raw_sha1_ostream_test.cpp
+++ b/llvm/unittests/Support/raw_sha1_ostream_test.cpp
@@ -14,7 +14,7 @@
 
 using namespace llvm;
 
-static std::string toHex(StringRef Input) {
+static std::string toHex(ArrayRef<uint8_t> Input) {
   static const char *const LUT = "0123456789ABCDEF";
   size_t Length = Input.size();
 
@@ -39,7 +39,7 @@ TEST(raw_sha1_ostreamTest, Basic) {
 TEST(sha1_hash_test, Basic) {
   ArrayRef<uint8_t> Input((const uint8_t *)"Hello World!", 12);
   std::array<uint8_t, 20> Vec = SHA1::hash(Input);
-  std::string Hash = toHex({(const char *)Vec.data(), 20});
+  std::string Hash = toHex(Vec);
   ASSERT_EQ("2EF7BDE608CE5404E97D5F042F95F89F1C232871", Hash);
 }
 

diff  --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp
index 30d5a8e1a7798..c6bfa941a8632 100644
--- a/mlir/lib/Pass/IRPrinting.cpp
+++ b/mlir/lib/Pass/IRPrinting.cpp
@@ -66,7 +66,7 @@ class OperationFingerPrint {
         ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(&data), sizeof(T)));
   }
 
-  SmallString<20> hash;
+  std::array<uint8_t, 20> hash;
 };
 
 //===----------------------------------------------------------------------===//


        


More information about the llvm-commits mailing list