[llvm] r287473 - SHA1: unroll loop in hashBlock.
Rui Ueyama via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 19 17:25:46 PST 2016
Thanks. Fixed in r287475.
On Sat, Nov 19, 2016 at 5:15 PM, Davide Italiano <davide at freebsd.org> wrote:
> On Sat, Nov 19, 2016 at 5:03 PM, Rui Ueyama via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
> > Author: ruiu
> > Date: Sat Nov 19 19:03:22 2016
> > New Revision: 287473
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=287473&view=rev
> > Log:
> > SHA1: unroll loop in hashBlock.
> >
> > This code is taken from public domain.
> > https://github.com/jsonn/src/blob/trunk/common/lib/libc/hash/sha1/sha1.c
> >
> > I wrote a sha1 command and ran it on my Xeon E5-2680 v2 2.80GHz machine.
> > Here is a result. The new hash function is 37% faster than before.
> >
> > Performance counter stats for './llvm-sha1-old /ssd/build/bin/lld' (10
> runs):
> >
> > 6640.503687 task-clock (msec) # 1.001 CPUs utilized
> ( +- 0.03% )
> > 54 context-switches # 0.008 K/sec
> ( +- 5.03% )
> > 5 cpu-migrations # 0.001 K/sec
> ( +- 31.73% )
> > 183,803 page-faults # 0.028 M/sec
> ( +- 0.00% )
> > 18,527,954,113 cycles # 2.790 GHz
> ( +- 0.03% )
> > 4,993,237,485 stalled-cycles-frontend # 26.95% frontend cycles
> idle ( +- 0.11% )
> > <not supported> stalled-cycles-backend
> > 50,217,149,423 instructions # 2.71 insns per cycle
> > # 0.10 stalled cycles
> per insn ( +- 0.00% )
> > 6,094,322,337 branches # 917.750 M/sec
> ( +- 0.00% )
> > 11,778,239 branch-misses # 0.19% of all branches
> ( +- 0.01% )
> >
> > 6.634017401 seconds time elapsed
> ( +- 0.03% )
> >
> > Performance counter stats for './llvm-sha1-new /ssd/build/bin/lld' (10
> runs):
> >
> > 4167.062720 task-clock (msec) # 1.001 CPUs utilized
> ( +- 0.02% )
> > 52 context-switches # 0.012 K/sec
> ( +- 16.45% )
> > 7 cpu-migrations # 0.002 K/sec
> ( +- 32.20% )
> > 183,804 page-faults # 0.044 M/sec
> ( +- 0.00% )
> > 11,626,611,958 cycles # 2.790 GHz
> ( +- 0.02% )
> > 4,491,897,976 stalled-cycles-frontend # 38.63% frontend cycles
> idle ( +- 0.05% )
> > <not supported> stalled-cycles-backend
> > 24,320,180,617 instructions # 2.09 insns per cycle
> > # 0.18 stalled cycles
> per insn ( +- 0.00% )
> > 1,574,674,576 branches # 377.886 M/sec
> ( +- 0.00% )
> > 11,769,693 branch-misses # 0.75% of all branches
> ( +- 0.00% )
> >
> > 4.163251552 seconds time elapsed
> ( +- 0.02% )
> >
> > Differential Revision: https://reviews.llvm.org/D26890
> >
> > Modified:
> > llvm/trunk/include/llvm/Support/SHA1.h
> > llvm/trunk/lib/Support/SHA1.cpp
> >
> > Modified: llvm/trunk/include/llvm/Support/SHA1.h
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/
> llvm/Support/SHA1.h?rev=287473&r1=287472&r2=287473&view=diff
> > ============================================================
> ==================
> > --- llvm/trunk/include/llvm/Support/SHA1.h (original)
> > +++ llvm/trunk/include/llvm/Support/SHA1.h Sat Nov 19 19:03:22 2016
> > @@ -61,7 +61,10 @@ private:
> >
> > // Internal State
> > struct {
> > - uint32_t Buffer[BLOCK_LENGTH / 4];
> > + union {
> > + uint8_t C[BLOCK_LENGTH];
> > + uint32_t L[BLOCK_LENGTH / 4];
> > + } Buffer;
> > uint32_t State[HASH_LENGTH / 4];
> > uint32_t ByteCount;
> > uint8_t BufferOffset;
> >
> > Modified: llvm/trunk/lib/Support/SHA1.cpp
> > URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/
> Support/SHA1.cpp?rev=287473&r1=287472&r2=287473&view=diff
> > ============================================================
> ==================
> > --- llvm/trunk/lib/Support/SHA1.cpp (original)
> > +++ llvm/trunk/lib/Support/SHA1.cpp Sat Nov 19 19:03:22 2016
> > @@ -6,8 +6,10 @@
> > // License. See LICENSE.TXT for details.
> > //
> > //===-------------------------------------------------------
> ---------------===//
> > +//
> > // This code is taken from public domain
> > -// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c)
> > +// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c and
> > +// http://cvsweb.netbsd.org/bsdweb.cgi/src/common/lib/
> libc/hash/sha1/sha1.c?rev=1.6)
> > // and modified by wrapping it in a C++ interface for LLVM,
> > // and removing unnecessary code.
> > //
> > @@ -25,6 +27,56 @@ using namespace llvm;
> > #define SHA_BIG_ENDIAN
> > #endif
> >
> > +static uint32_t rol(uint32_t number, int bits) {
> > + return (number << bits) | (number >> (32 - bits));
> > +};
>
> Nit: number/bits should be uppercase (LLVM convention && consistency
> with the rest of the file).
>
> > +
> > +#if SHA_BIG_ENDIAN
> > +static uint32_t blk0(uint32_t *Buf, int I) {
> > + Buf[I] = (rol(Buf[I], 24) & 0xFF00FF00) | (rol(Buf[I], 8) &
> 0x00FF00FF);
> > + return Buf[I];
> > +}
> > +#else
> > +static uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; }
> > +#endif
> > +
> > +static uint32_t blk(uint32_t *Buf, int I) {
> > + Buf[I & 15] = rol(Buf[(I + 13) & 15] ^ Buf[(I + 8) & 15] ^ Buf[(I +
> 2) & 15] ^
> > + Buf[I & 15],
> > + 1);
> > + return Buf[I & 15];
> > +}
> > +
> > +static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
> uint32_t &E,
> > + int I, uint32_t *Buf) {
> > + E += ((B & (C ^ D)) ^ D) + blk0(Buf, I) + 0x5A827999 + rol(A, 5);
> > + B = rol(B, 30);
> > +}
> > +
> > +static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
> uint32_t &E,
> > + int I, uint32_t *Buf) {
> > + E += ((B & (C ^ D)) ^ D) + blk(Buf, I) + 0x5A827999 + rol(A, 5);
> > + B = rol(B, 30);
> > +}
> > +
> > +static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
> uint32_t &E,
> > + int I, uint32_t *Buf) {
> > + E += (B ^ C ^ D) + blk(Buf, I) + 0x6ED9EBA1 + rol(A, 5);
> > + B = rol(B, 30);
> > +}
> > +
> > +static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
> uint32_t &E,
> > + int I, uint32_t *Buf) {
> > + E += (((B | C) & D) | (B & C)) + blk(Buf, I) + 0x8F1BBCDC + rol(A, 5);
> > + B = rol(B, 30);
> > +}
> > +
> > +static void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D,
> uint32_t &E,
> > + int I, uint32_t *Buf) {
> > + E += (B ^ C ^ D) + blk(Buf, I) + 0xCA62C1D6 + rol(A, 5);
> > + B = rol(B, 30);
> > +}
> > +
> > /* code */
> > #define SHA1_K0 0x5a827999
> > #define SHA1_K20 0x6ed9eba1
> > @@ -47,56 +99,112 @@ void SHA1::init() {
> > InternalState.BufferOffset = 0;
> > }
> >
> > -static uint32_t rol32(uint32_t number, uint8_t bits) {
> > - return ((number << bits) | (number >> (32 - bits)));
> > -}
> > -
> > void SHA1::hashBlock() {
> > - uint8_t i;
> > - uint32_t a, b, c, d, e, t;
> > -
> > - a = InternalState.State[0];
> > - b = InternalState.State[1];
> > - c = InternalState.State[2];
> > - d = InternalState.State[3];
> > - e = InternalState.State[4];
> > - for (i = 0; i < 80; i++) {
> > - if (i >= 16) {
> > - t = InternalState.Buffer[(i + 13) & 15] ^
> > - InternalState.Buffer[(i + 8) & 15] ^
> > - InternalState.Buffer[(i + 2) & 15] ^ InternalState.Buffer[i &
> 15];
> > - InternalState.Buffer[i & 15] = rol32(t, 1);
> > - }
> > - if (i < 20) {
> > - t = (d ^ (b & (c ^ d))) + SHA1_K0;
> > - } else if (i < 40) {
> > - t = (b ^ c ^ d) + SHA1_K20;
> > - } else if (i < 60) {
> > - t = ((b & c) | (d & (b | c))) + SHA1_K40;
> > - } else {
> > - t = (b ^ c ^ d) + SHA1_K60;
> > - }
> > - t += rol32(a, 5) + e + InternalState.Buffer[i & 15];
> > - e = d;
> > - d = c;
> > - c = rol32(b, 30);
> > - b = a;
> > - a = t;
> > - }
> > - InternalState.State[0] += a;
> > - InternalState.State[1] += b;
> > - InternalState.State[2] += c;
> > - InternalState.State[3] += d;
> > - InternalState.State[4] += e;
> > + uint32_t A = InternalState.State[0];
> > + uint32_t B = InternalState.State[1];
> > + uint32_t C = InternalState.State[2];
> > + uint32_t D = InternalState.State[3];
> > + uint32_t E = InternalState.State[4];
> > +
> > + // 4 rounds of 20 operations each. Loop unrolled.
> > + r0(A, B, C, D, E, 0, InternalState.Buffer.L);
> > + r0(E, A, B, C, D, 1, InternalState.Buffer.L);
> > + r0(D, E, A, B, C, 2, InternalState.Buffer.L);
> > + r0(C, D, E, A, B, 3, InternalState.Buffer.L);
> > + r0(B, C, D, E, A, 4, InternalState.Buffer.L);
> > + r0(A, B, C, D, E, 5, InternalState.Buffer.L);
> > + r0(E, A, B, C, D, 6, InternalState.Buffer.L);
> > + r0(D, E, A, B, C, 7, InternalState.Buffer.L);
> > + r0(C, D, E, A, B, 8, InternalState.Buffer.L);
> > + r0(B, C, D, E, A, 9, InternalState.Buffer.L);
> > + r0(A, B, C, D, E, 10, InternalState.Buffer.L);
> > + r0(E, A, B, C, D, 11, InternalState.Buffer.L);
> > + r0(D, E, A, B, C, 12, InternalState.Buffer.L);
> > + r0(C, D, E, A, B, 13, InternalState.Buffer.L);
> > + r0(B, C, D, E, A, 14, InternalState.Buffer.L);
> > + r0(A, B, C, D, E, 15, InternalState.Buffer.L);
> > + r1(E, A, B, C, D, 16, InternalState.Buffer.L);
> > + r1(D, E, A, B, C, 17, InternalState.Buffer.L);
> > + r1(C, D, E, A, B, 18, InternalState.Buffer.L);
> > + r1(B, C, D, E, A, 19, InternalState.Buffer.L);
> > +
> > + r2(A, B, C, D, E, 20, InternalState.Buffer.L);
> > + r2(E, A, B, C, D, 21, InternalState.Buffer.L);
> > + r2(D, E, A, B, C, 22, InternalState.Buffer.L);
> > + r2(C, D, E, A, B, 23, InternalState.Buffer.L);
> > + r2(B, C, D, E, A, 24, InternalState.Buffer.L);
> > + r2(A, B, C, D, E, 25, InternalState.Buffer.L);
> > + r2(E, A, B, C, D, 26, InternalState.Buffer.L);
> > + r2(D, E, A, B, C, 27, InternalState.Buffer.L);
> > + r2(C, D, E, A, B, 28, InternalState.Buffer.L);
> > + r2(B, C, D, E, A, 29, InternalState.Buffer.L);
> > + r2(A, B, C, D, E, 30, InternalState.Buffer.L);
> > + r2(E, A, B, C, D, 31, InternalState.Buffer.L);
> > + r2(D, E, A, B, C, 32, InternalState.Buffer.L);
> > + r2(C, D, E, A, B, 33, InternalState.Buffer.L);
> > + r2(B, C, D, E, A, 34, InternalState.Buffer.L);
> > + r2(A, B, C, D, E, 35, InternalState.Buffer.L);
> > + r2(E, A, B, C, D, 36, InternalState.Buffer.L);
> > + r2(D, E, A, B, C, 37, InternalState.Buffer.L);
> > + r2(C, D, E, A, B, 38, InternalState.Buffer.L);
> > + r2(B, C, D, E, A, 39, InternalState.Buffer.L);
> > +
> > + r3(A, B, C, D, E, 40, InternalState.Buffer.L);
> > + r3(E, A, B, C, D, 41, InternalState.Buffer.L);
> > + r3(D, E, A, B, C, 42, InternalState.Buffer.L);
> > + r3(C, D, E, A, B, 43, InternalState.Buffer.L);
> > + r3(B, C, D, E, A, 44, InternalState.Buffer.L);
> > + r3(A, B, C, D, E, 45, InternalState.Buffer.L);
> > + r3(E, A, B, C, D, 46, InternalState.Buffer.L);
> > + r3(D, E, A, B, C, 47, InternalState.Buffer.L);
> > + r3(C, D, E, A, B, 48, InternalState.Buffer.L);
> > + r3(B, C, D, E, A, 49, InternalState.Buffer.L);
> > + r3(A, B, C, D, E, 50, InternalState.Buffer.L);
> > + r3(E, A, B, C, D, 51, InternalState.Buffer.L);
> > + r3(D, E, A, B, C, 52, InternalState.Buffer.L);
> > + r3(C, D, E, A, B, 53, InternalState.Buffer.L);
> > + r3(B, C, D, E, A, 54, InternalState.Buffer.L);
> > + r3(A, B, C, D, E, 55, InternalState.Buffer.L);
> > + r3(E, A, B, C, D, 56, InternalState.Buffer.L);
> > + r3(D, E, A, B, C, 57, InternalState.Buffer.L);
> > + r3(C, D, E, A, B, 58, InternalState.Buffer.L);
> > + r3(B, C, D, E, A, 59, InternalState.Buffer.L);
> > +
> > + r4(A, B, C, D, E, 60, InternalState.Buffer.L);
> > + r4(E, A, B, C, D, 61, InternalState.Buffer.L);
> > + r4(D, E, A, B, C, 62, InternalState.Buffer.L);
> > + r4(C, D, E, A, B, 63, InternalState.Buffer.L);
> > + r4(B, C, D, E, A, 64, InternalState.Buffer.L);
> > + r4(A, B, C, D, E, 65, InternalState.Buffer.L);
> > + r4(E, A, B, C, D, 66, InternalState.Buffer.L);
> > + r4(D, E, A, B, C, 67, InternalState.Buffer.L);
> > + r4(C, D, E, A, B, 68, InternalState.Buffer.L);
> > + r4(B, C, D, E, A, 69, InternalState.Buffer.L);
> > + r4(A, B, C, D, E, 70, InternalState.Buffer.L);
> > + r4(E, A, B, C, D, 71, InternalState.Buffer.L);
> > + r4(D, E, A, B, C, 72, InternalState.Buffer.L);
> > + r4(C, D, E, A, B, 73, InternalState.Buffer.L);
> > + r4(B, C, D, E, A, 74, InternalState.Buffer.L);
> > + r4(A, B, C, D, E, 75, InternalState.Buffer.L);
> > + r4(E, A, B, C, D, 76, InternalState.Buffer.L);
> > + r4(D, E, A, B, C, 77, InternalState.Buffer.L);
> > + r4(C, D, E, A, B, 78, InternalState.Buffer.L);
> > + r4(B, C, D, E, A, 79, InternalState.Buffer.L);
> > +
> > + InternalState.State[0] += A;
> > + InternalState.State[1] += B;
> > + InternalState.State[2] += C;
> > + InternalState.State[3] += D;
> > + InternalState.State[4] += E;
> > }
> >
> > void SHA1::addUncounted(uint8_t data) {
> > - uint8_t *const b = (uint8_t *)InternalState.Buffer;
> > #ifdef SHA_BIG_ENDIAN
> > - b[InternalState.BufferOffset] = data;
> > + InternalState.Buffer.C[InternalState.BufferOffset] = data;
> > #else
> > - b[InternalState.BufferOffset ^ 3] = data;
> > + InternalState.Buffer.C[InternalState.BufferOffset ^ 3] = data;
> > #endif
> > +
> > InternalState.BufferOffset++;
> > if (InternalState.BufferOffset == BLOCK_LENGTH) {
> > hashBlock();
> >
> >
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at lists.llvm.org
> > http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
> --
> Davide
>
> "There are no solved problems; there are only problems that are more
> or less solved" -- Henri Poincare
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20161119/64261d7b/attachment.html>
More information about the llvm-commits
mailing list