[llvm] r265107 - Add support for computing SHA1 in LLVM

Mehdi Amini via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 31 21:30:17 PDT 2016


Author: mehdi_amini
Date: Thu Mar 31 23:30:16 2016
New Revision: 265107

URL: http://llvm.org/viewvc/llvm-project?rev=265107&view=rev
Log:
Add support for computing SHA1 in LLVM

Provide a class to generate a SHA1 from a sequence of bytes, and
a convenience raw_ostream adaptor.
This will be used to provide a "build-id" by hashing the Module
block when writing bitcode. ThinLTO will use this information for
incremental build.

Reapply r265094 which was reverted in r265102 because it broke
MSVC bots (constexpr is not supported).

http://reviews.llvm.org/D16325

From: Mehdi Amini <mehdi.amini at apple.com>

Added:
    llvm/trunk/include/llvm/Support/SHA1.h
    llvm/trunk/include/llvm/Support/raw_sha1_ostream.h
    llvm/trunk/lib/Support/SHA1.cpp
    llvm/trunk/unittests/Support/raw_sha1_ostream_test.cpp
Modified:
    llvm/trunk/lib/Support/CMakeLists.txt
    llvm/trunk/unittests/Support/CMakeLists.txt

Added: llvm/trunk/include/llvm/Support/SHA1.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/SHA1.h?rev=265107&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Support/SHA1.h (added)
+++ llvm/trunk/include/llvm/Support/SHA1.h Thu Mar 31 23:30:16 2016
@@ -0,0 +1,75 @@
+//==- SHA1.h - SHA1 implementation for LLVM                     --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This code is taken from public domain
+// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c)
+// and modified by wrapping it in a C++ interface for LLVM,
+// and removing unnecessary code.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_SHA1_H
+#define LLVM_SUPPORT_SHA1_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdint>
+
+namespace llvm {
+
+/// A class that wrap the SHA1 algorithm.
+class SHA1 {
+public:
+  SHA1() { init(); }
+
+  /// Reinitialize the internal state
+  void init();
+
+  /// Digest more data.
+  void update(ArrayRef<uint8_t> Data);
+
+  /// Return a reference to the current raw 160-bits SHA1 for the digested data
+  /// since the last call to init(). This call will add data to the internal
+  /// state and as such is not suited for getting an intermediate result
+  /// (see result()).
+  StringRef final();
+
+  /// Return a reference to the current raw 160-bits SHA1 for the digested data
+  /// since the last call to init(). This is suitable for getting the SHA1 at
+  /// any time without invalidating the internal state so that more calls can be
+  /// made into update.
+  StringRef result();
+
+private:
+  /// Define some constants.
+  /// "static constexpr" would be cleaner but MSVC does not support it yet.
+  enum { BLOCK_LENGTH = 64 };
+  enum { HASH_LENGTH = 20 };
+
+  // Internal State
+  struct {
+    uint32_t Buffer[BLOCK_LENGTH / 4];
+    uint32_t State[HASH_LENGTH / 4];
+    uint32_t ByteCount;
+    uint8_t BufferOffset;
+  } InternalState;
+
+  // Internal copy of the hash, populated and accessed on calls to result()
+  uint32_t HashResult[HASH_LENGTH / 4];
+
+  // Helper
+  void writebyte(uint8_t data);
+  void hashBlock();
+  void addUncounted(uint8_t data);
+  void pad();
+};
+
+} // end llvm namespace
+
+#endif

Added: llvm/trunk/include/llvm/Support/raw_sha1_ostream.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/raw_sha1_ostream.h?rev=265107&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Support/raw_sha1_ostream.h (added)
+++ llvm/trunk/include/llvm/Support/raw_sha1_ostream.h Thu Mar 31 23:30:16 2016
@@ -0,0 +1,46 @@
+//==- raw_sha1_ostream.h - raw_ostream that compute SHA1        --*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//  This file defines the raw_sha1_ostream class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_RAW_SHA1_OSTREAM_H
+#define LLVM_SUPPORT_RAW_SHA1_OSTREAM_H
+
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SHA1.h"
+
+namespace llvm {
+
+/// A raw_ostream that hash the content using the sha1 algorithm.
+class raw_sha1_ostream : public raw_ostream {
+  SHA1 State;
+
+  /// See raw_ostream::write_impl.
+  void write_impl(const char *Ptr, size_t Size) override {
+    State.update(ArrayRef<uint8_t>((uint8_t *)Ptr, Size));
+  }
+
+public:
+  /// Return the current SHA1 hash for the content of the stream
+  StringRef sha1() {
+    flush();
+    return State.result();
+  }
+
+  /// Reset the internal state to start over from scratch.
+  void resetHash() { State.init(); }
+
+  uint64_t current_pos() const override { return 0; }
+};
+
+} // end llvm namespace
+
+#endif

Modified: llvm/trunk/lib/Support/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/CMakeLists.txt?rev=265107&r1=265106&r2=265107&view=diff
==============================================================================
--- llvm/trunk/lib/Support/CMakeLists.txt (original)
+++ llvm/trunk/lib/Support/CMakeLists.txt Thu Mar 31 23:30:16 2016
@@ -75,6 +75,7 @@ add_llvm_library(LLVMSupport
   RandomNumberGenerator.cpp
   Regex.cpp
   ScaledNumber.cpp
+  SHA1.cpp
   SmallPtrSet.cpp
   SmallVector.cpp
   SourceMgr.cpp

Added: llvm/trunk/lib/Support/SHA1.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/SHA1.cpp?rev=265107&view=auto
==============================================================================
--- llvm/trunk/lib/Support/SHA1.cpp (added)
+++ llvm/trunk/lib/Support/SHA1.cpp Thu Mar 31 23:30:16 2016
@@ -0,0 +1,168 @@
+//======- SHA1.h - Private copy of the SHA1 implementation ---*- C++ -* ======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This code is taken from public domain
+// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c)
+// and modified by wrapping it in a C++ interface for LLVM,
+// and removing unnecessary code.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/SHA1.h"
+using namespace llvm;
+
+#include <stdint.h>
+#include <string.h>
+
+#ifdef __BIG_ENDIAN__
+#define SHA_BIG_ENDIAN
+#endif
+
+/* code */
+#define SHA1_K0 0x5a827999
+#define SHA1_K20 0x6ed9eba1
+#define SHA1_K40 0x8f1bbcdc
+#define SHA1_K60 0xca62c1d6
+
+#define SEED_0 0x67452301
+#define SEED_1 0xefcdab89
+#define SEED_2 0x98badcfe
+#define SEED_3 0x10325476
+#define SEED_4 0xc3d2e1f0
+
+void SHA1::init() {
+  InternalState.State[0] = SEED_0;
+  InternalState.State[1] = SEED_1;
+  InternalState.State[2] = SEED_2;
+  InternalState.State[3] = SEED_3;
+  InternalState.State[4] = SEED_4;
+  InternalState.ByteCount = 0;
+  InternalState.BufferOffset = 0;
+}
+
+static uint32_t rol32(uint32_t number, uint8_t bits) {
+  return ((number << bits) | (number >> (32 - bits)));
+}
+
+void SHA1::hashBlock() {
+  uint8_t i;
+  uint32_t a, b, c, d, e, t;
+
+  a = InternalState.State[0];
+  b = InternalState.State[1];
+  c = InternalState.State[2];
+  d = InternalState.State[3];
+  e = InternalState.State[4];
+  for (i = 0; i < 80; i++) {
+    if (i >= 16) {
+      t = InternalState.Buffer[(i + 13) & 15] ^
+          InternalState.Buffer[(i + 8) & 15] ^
+          InternalState.Buffer[(i + 2) & 15] ^ InternalState.Buffer[i & 15];
+      InternalState.Buffer[i & 15] = rol32(t, 1);
+    }
+    if (i < 20) {
+      t = (d ^ (b & (c ^ d))) + SHA1_K0;
+    } else if (i < 40) {
+      t = (b ^ c ^ d) + SHA1_K20;
+    } else if (i < 60) {
+      t = ((b & c) | (d & (b | c))) + SHA1_K40;
+    } else {
+      t = (b ^ c ^ d) + SHA1_K60;
+    }
+    t += rol32(a, 5) + e + InternalState.Buffer[i & 15];
+    e = d;
+    d = c;
+    c = rol32(b, 30);
+    b = a;
+    a = t;
+  }
+  InternalState.State[0] += a;
+  InternalState.State[1] += b;
+  InternalState.State[2] += c;
+  InternalState.State[3] += d;
+  InternalState.State[4] += e;
+}
+
+void SHA1::addUncounted(uint8_t data) {
+  uint8_t *const b = (uint8_t *)InternalState.Buffer;
+#ifdef SHA_BIG_ENDIAN
+  b[InternalState.BufferOffset] = data;
+#else
+  b[InternalState.BufferOffset ^ 3] = data;
+#endif
+  InternalState.BufferOffset++;
+  if (InternalState.BufferOffset == BLOCK_LENGTH) {
+    hashBlock();
+    InternalState.BufferOffset = 0;
+  }
+}
+
+void SHA1::writebyte(uint8_t data) {
+  ++InternalState.ByteCount;
+  addUncounted(data);
+}
+
+void SHA1::update(ArrayRef<uint8_t> Data) {
+  for (auto &C : Data)
+    writebyte(C);
+}
+
+void SHA1::pad() {
+  // Implement SHA-1 padding (fips180-2 §5.1.1)
+
+  // Pad with 0x80 followed by 0x00 until the end of the block
+  addUncounted(0x80);
+  while (InternalState.BufferOffset != 56)
+    addUncounted(0x00);
+
+  // Append length in the last 8 bytes
+  addUncounted(0); // We're only using 32 bit lengths
+  addUncounted(0); // But SHA-1 supports 64 bit lengths
+  addUncounted(0); // So zero pad the top bits
+  addUncounted(InternalState.ByteCount >> 29); // Shifting to multiply by 8
+  addUncounted(InternalState.ByteCount >>
+               21); // as SHA-1 supports bitstreams as well as
+  addUncounted(InternalState.ByteCount >> 13); // byte.
+  addUncounted(InternalState.ByteCount >> 5);
+  addUncounted(InternalState.ByteCount << 3);
+}
+
+StringRef SHA1::final() {
+  // Pad to complete the last block
+  pad();
+
+#ifdef SHA_BIG_ENDIAN
+  // Just copy the current state
+  for (int i = 0; i < 5; i++) {
+    HashResult[i] = InternalState.State[i];
+  }
+#else
+  // Swap byte order back
+  for (int i = 0; i < 5; i++) {
+    HashResult[i] = (((InternalState.State[i]) << 24) & 0xff000000) |
+                    (((InternalState.State[i]) << 8) & 0x00ff0000) |
+                    (((InternalState.State[i]) >> 8) & 0x0000ff00) |
+                    (((InternalState.State[i]) >> 24) & 0x000000ff);
+  }
+#endif
+
+  // Return pointer to hash (20 characters)
+  return StringRef((char *)HashResult, HASH_LENGTH);
+}
+
+StringRef SHA1::result() {
+  auto StateToRestore = InternalState;
+
+  auto Hash = final();
+
+  // Restore the state
+  InternalState = StateToRestore;
+
+  // Return pointer to hash (20 characters)
+  return Hash;
+}

Modified: llvm/trunk/unittests/Support/CMakeLists.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/CMakeLists.txt?rev=265107&r1=265106&r2=265107&view=diff
==============================================================================
--- llvm/trunk/unittests/Support/CMakeLists.txt (original)
+++ llvm/trunk/unittests/Support/CMakeLists.txt Thu Mar 31 23:30:16 2016
@@ -52,6 +52,7 @@ add_llvm_unittest(SupportTests
   formatted_raw_ostream_test.cpp
   raw_ostream_test.cpp
   raw_pwrite_stream_test.cpp
+  raw_sha1_ostream_test.cpp
   )
 
 # ManagedStatic.cpp uses <pthread>.

Added: llvm/trunk/unittests/Support/raw_sha1_ostream_test.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/Support/raw_sha1_ostream_test.cpp?rev=265107&view=auto
==============================================================================
--- llvm/trunk/unittests/Support/raw_sha1_ostream_test.cpp (added)
+++ llvm/trunk/unittests/Support/raw_sha1_ostream_test.cpp Thu Mar 31 23:30:16 2016
@@ -0,0 +1,72 @@
+//===- llvm/unittest/Support/raw_ostream_test.cpp - raw_ostream tests -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_sha1_ostream.h"
+
+#include <string>
+
+using namespace llvm;
+
+static std::string toHex(StringRef Input) {
+  static const char *const LUT = "0123456789ABCDEF";
+  size_t Length = Input.size();
+
+  std::string Output;
+  Output.reserve(2 * Length);
+  for (size_t i = 0; i < Length; ++i) {
+    const unsigned char c = Input[i];
+    Output.push_back(LUT[c >> 4]);
+    Output.push_back(LUT[c & 15]);
+  }
+  return Output;
+}
+
+TEST(raw_sha1_ostreamTest, Basic) {
+  llvm::raw_sha1_ostream Sha1Stream;
+  Sha1Stream << "Hello World!";
+  auto Hash = toHex(Sha1Stream.sha1());
+
+  ASSERT_EQ("2EF7BDE608CE5404E97D5F042F95F89F1C232871", Hash);
+}
+
+// Check that getting the intermediate hash in the middle of the stream does
+// not invalidate the final result.
+TEST(raw_sha1_ostreamTest, Intermediate) {
+  llvm::raw_sha1_ostream Sha1Stream;
+  Sha1Stream << "Hello";
+  auto Hash = toHex(Sha1Stream.sha1());
+
+  ASSERT_EQ("F7FF9E8B7BB2E09B70935A5D785E0CC5D9D0ABF0", Hash);
+  Sha1Stream << " World!";
+  Hash = toHex(Sha1Stream.sha1());
+
+  // Compute the non-split hash separately as a reference.
+  llvm::raw_sha1_ostream NonSplitSha1Stream;
+  NonSplitSha1Stream << "Hello World!";
+  auto NonSplitHash = toHex(NonSplitSha1Stream.sha1());
+
+  ASSERT_EQ(NonSplitHash, Hash);
+}
+
+TEST(raw_sha1_ostreamTest, Reset) {
+  llvm::raw_sha1_ostream Sha1Stream;
+  Sha1Stream << "Hello";
+  auto Hash = toHex(Sha1Stream.sha1());
+
+  ASSERT_EQ("F7FF9E8B7BB2E09B70935A5D785E0CC5D9D0ABF0", Hash);
+
+  Sha1Stream.resetHash();
+  Sha1Stream << " World!";
+  Hash = toHex(Sha1Stream.sha1());
+
+  ASSERT_EQ("7447F2A5A42185C8CF91E632789C431830B59067", Hash);
+}




More information about the llvm-commits mailing list