[llvm] r298322 - Add a function to MD5 a file's contents.

Zachary Turner via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 20 16:33:19 PDT 2017


Author: zturner
Date: Mon Mar 20 18:33:18 2017
New Revision: 298322

URL: http://llvm.org/viewvc/llvm-project?rev=298322&view=rev
Log:
Add a function to MD5 a file's contents.

In doing so, clean up the MD5 interface a little.  Most
existing users only care about the lower 8 bytes of an MD5,
but for some users that care about the upper and lower,
there wasn't a good interface.  Furthermore, consumers
of the MD5 checksum were required to handle endianness
details on their own, so it seems reasonable to abstract
this into a nicer interface that just gives you the right
value.

Differential Revision: https://reviews.llvm.org/D31105

Modified:
    llvm/trunk/include/llvm/Support/FileSystem.h
    llvm/trunk/include/llvm/Support/MD5.h
    llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp
    llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
    llvm/trunk/lib/Support/MD5.cpp
    llvm/trunk/lib/Support/Path.cpp
    llvm/trunk/unittests/Support/MD5Test.cpp
    llvm/trunk/unittests/Support/Path.cpp

Modified: llvm/trunk/include/llvm/Support/FileSystem.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/FileSystem.h?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/FileSystem.h (original)
+++ llvm/trunk/include/llvm/Support/FileSystem.h Mon Mar 20 18:33:18 2017
@@ -33,6 +33,7 @@
 #include "llvm/Support/Chrono.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
+#include "llvm/Support/MD5.h"
 #include <cassert>
 #include <cstdint>
 #include <ctime>
@@ -399,6 +400,16 @@ std::error_code copy_file(const Twine &F
 ///          platform-specific error_code.
 std::error_code resize_file(int FD, uint64_t Size);
 
+/// @brief Compute an MD5 hash of a file's contents.
+///
+/// @param FD Input file descriptor.
+/// @returns An MD5Result with the hash computed, if successful, otherwise a
+///          std::error_code.
+ErrorOr<MD5::MD5Result> md5_contents(int FD);
+
+/// @brief Version of compute_md5 that doesn't require an open file descriptor.
+ErrorOr<MD5::MD5Result> md5_contents(const Twine &Path);
+
 /// @}
 /// @name Physical Observers
 /// @{

Modified: llvm/trunk/include/llvm/Support/MD5.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/MD5.h?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/MD5.h (original)
+++ llvm/trunk/include/llvm/Support/MD5.h Mon Mar 20 18:33:18 2017
@@ -52,7 +52,32 @@ class MD5 {
   MD5_u32plus block[16];
 
 public:
-  typedef uint8_t MD5Result[16];
+  struct MD5Result {
+    std::array<uint8_t, 16> Bytes;
+
+    operator std::array<uint8_t, 16>() const { return Bytes; }
+
+    const uint8_t &operator[](size_t I) const { return Bytes[I]; }
+    uint8_t &operator[](size_t I) { return Bytes[I]; }
+
+    SmallString<32> digest() const;
+
+    uint64_t low() const {
+      // Our MD5 implementation returns the result in little endian, so the low
+      // word is first.
+      using namespace support;
+      return endian::read<uint64_t, little, unaligned>(Bytes.data());
+    }
+
+    uint64_t high() const {
+      using namespace support;
+      return endian::read<uint64_t, little, unaligned>(Bytes.data() + 8);
+    }
+    std::pair<uint64_t, uint64_t> words() const {
+      using namespace support;
+      return std::make_pair(high(), low());
+    }
+  };
 
   MD5();
 
@@ -76,6 +101,10 @@ private:
   const uint8_t *body(ArrayRef<uint8_t> Data);
 };
 
+inline bool operator==(const MD5::MD5Result &LHS, const MD5::MD5Result &RHS) {
+  return LHS.Bytes == RHS.Bytes;
+}
+
 /// Helper to compute and return lower 64 bits of the given string's MD5 hash.
 inline uint64_t MD5Hash(StringRef Str) {
   using namespace support;
@@ -84,9 +113,8 @@ inline uint64_t MD5Hash(StringRef Str) {
   Hash.update(Str);
   MD5::MD5Result Result;
   Hash.final(Result);
-  // Return the least significant 8 bytes. Our MD5 implementation returns the
-  // result in little endian, so we may need to swap bytes.
-  return endian::read<uint64_t, little, unaligned>(Result);
+  // Return the least significant word.
+  return Result.low();
 }
 
 } // end namespace llvm

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DIEHash.cpp Mon Mar 20 18:33:18 2017
@@ -490,9 +490,9 @@ uint64_t DIEHash::computeCUSignature(con
   Hash.final(Result);
 
   // ... take the least significant 8 bytes and return those. Our MD5
-  // implementation always returns its results in little endian, swap bytes
-  // appropriately.
-  return support::endian::read64le(Result + 8);
+  // implementation always returns its results in little endian, so we actually
+  // need the "high" word.
+  return Result.high();
 }
 
 /// This is based on the type signature computation given in section 7.27 of the
@@ -514,7 +514,7 @@ uint64_t DIEHash::computeTypeSignature(c
   Hash.final(Result);
 
   // ... take the least significant 8 bytes and return those. Our MD5
-  // implementation always returns its results in little endian, swap bytes
-  // appropriately.
-  return support::endian::read64le(Result + 8);
+  // implementation always returns its results in little endian, so we actually
+  // need the "high" word.
+  return Result.high();
 }

Modified: llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (original)
+++ llvm/trunk/lib/CodeGen/AsmPrinter/DwarfDebug.cpp Mon Mar 20 18:33:18 2017
@@ -39,7 +39,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Dwarf.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
@@ -1945,11 +1944,11 @@ uint64_t DwarfDebug::makeTypeSignature(S
   MD5 Hash;
   Hash.update(Identifier);
   // ... take the least significant 8 bytes and return those. Our MD5
-  // implementation always returns its results in little endian, swap bytes
-  // appropriately.
+  // implementation always returns its results in little endian, so we actually
+  // need the "high" word.
   MD5::MD5Result Result;
   Hash.final(Result);
-  return support::endian::read64le(Result + 8);
+  return Result.high();
 }
 
 void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,

Modified: llvm/trunk/lib/Support/MD5.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/MD5.cpp?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/lib/Support/MD5.cpp (original)
+++ llvm/trunk/lib/Support/MD5.cpp Mon Mar 20 18:33:18 2017
@@ -261,10 +261,16 @@ void MD5::final(MD5Result &Result) {
   support::endian::write32le(&Result[12], d);
 }
 
-void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
+SmallString<32> MD5::MD5Result::digest() const {
+  SmallString<32> Str;
   raw_svector_ostream Res(Str);
   for (int i = 0; i < 16; ++i)
-    Res << format("%.2x", Result[i]);
+    Res << format("%.2x", Bytes[i]);
+  return Str;
+}
+
+void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
+  Str = Result.digest();
 }
 
 std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
@@ -273,7 +279,5 @@ std::array<uint8_t, 16> MD5::hash(ArrayR
   MD5::MD5Result Res;
   Hash.final(Res);
 
-  std::array<uint8_t, 16> Arr;
-  memcpy(Arr.data(), Res, sizeof(Res));
-  return Arr;
+  return Res;
 }

Modified: llvm/trunk/lib/Support/Path.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Path.cpp?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Path.cpp (original)
+++ llvm/trunk/lib/Support/Path.cpp Mon Mar 20 18:33:18 2017
@@ -11,13 +11,14 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Support/Path.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/COFF.h"
-#include "llvm/Support/MachO.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Support/MachO.h"
 #include "llvm/Support/Process.h"
 #include <cctype>
 #include <cstring>
@@ -924,6 +925,36 @@ std::error_code copy_file(const Twine &F
   return std::error_code();
 }
 
+ErrorOr<MD5::MD5Result> md5_contents(int FD) {
+  MD5 Hash;
+
+  constexpr size_t BufSize = 4096;
+  std::vector<uint8_t> Buf(BufSize);
+  int BytesRead = 0;
+  for (;;) {
+    BytesRead = read(FD, Buf.data(), BufSize);
+    if (BytesRead <= 0)
+      break;
+    Hash.update(makeArrayRef(Buf.data(), BytesRead));
+  }
+
+  if (BytesRead < 0)
+    return std::error_code(errno, std::generic_category());
+  MD5::MD5Result Result;
+  Hash.final(Result);
+  return Result;
+}
+
+ErrorOr<MD5::MD5Result> md5_contents(const Twine &Path) {
+  int FD;
+  if (auto EC = openFileForRead(Path, FD))
+    return EC;
+
+  auto Result = md5_contents(FD);
+  close(FD);
+  return Result;
+}
+
 bool exists(file_status status) {
   return status_known(status) && status.type() != file_type::file_not_found;
 }

Modified: llvm/trunk/unittests/Support/MD5Test.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/MD5Test.cpp?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/MD5Test.cpp (original)
+++ llvm/trunk/unittests/Support/MD5Test.cpp Mon Mar 20 18:33:18 2017
@@ -63,8 +63,10 @@ TEST(MD5HashTest, MD5) {
   std::array<uint8_t, 16> Vec = MD5::hash(Input);
   MD5::MD5Result MD5Res;
   SmallString<32> Res;
-  memcpy(MD5Res, Vec.data(), Vec.size());
+  memcpy(MD5Res.Bytes.data(), Vec.data(), Vec.size());
   MD5::stringifyResult(MD5Res, Res);
   EXPECT_EQ(Res, "c3fcd3d76192e4007dfb496cca67e13b");
+  EXPECT_EQ(0x3be167ca6c49fb7dULL, MD5Res.high());
+  EXPECT_EQ(0x00e49261d7d3fcc3ULL, MD5Res.low());
 }
 }

Modified: llvm/trunk/unittests/Support/Path.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/Path.cpp?rev=298322&r1=298321&r2=298322&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/Path.cpp (original)
+++ llvm/trunk/unittests/Support/Path.cpp Mon Mar 20 18:33:18 2017
@@ -1011,6 +1011,20 @@ TEST_F(FileSystemTest, Resize) {
   ASSERT_NO_ERROR(fs::remove(TempPath));
 }
 
+TEST_F(FileSystemTest, MD5) {
+  int FD;
+  SmallString<64> TempPath;
+  ASSERT_NO_ERROR(fs::createTemporaryFile("prefix", "temp", FD, TempPath));
+  StringRef Data("abcdefghijklmnopqrstuvwxyz");
+  write(FD, Data.data(), Data.size());
+  lseek(FD, 0, SEEK_SET);
+  auto Hash = fs::md5_contents(FD);
+  ::close(FD);
+  ASSERT_NO_ERROR(Hash.getError());
+
+  EXPECT_STREQ("c3fcd3d76192e4007dfb496cca67e13b", Hash->digest().c_str());
+}
+
 TEST_F(FileSystemTest, FileMapping) {
   // Create a temp file.
   int FileDescriptor;




More information about the llvm-commits mailing list