[llvm] 7f05aa2 - [Support/BLAKE3] LLVM-specific changes over the original BLAKE3 C implementation

Argyrios Kyrtzidis via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 24 10:26:53 PDT 2022


Author: Argyrios Kyrtzidis
Date: 2022-03-24T10:26:39-07:00
New Revision: 7f05aa2d4c36d6d53f97ac3e0db30ec600abbc62

URL: https://github.com/llvm/llvm-project/commit/7f05aa2d4c36d6d53f97ac3e0db30ec600abbc62
DIFF: https://github.com/llvm/llvm-project/commit/7f05aa2d4c36d6d53f97ac3e0db30ec600abbc62.diff

LOG: [Support/BLAKE3] LLVM-specific changes over the original BLAKE3 C implementation

Changes from original BLAKE3 sources:

* `blake.h`:
    * Changes to avoid conflicts if a client also links with its own BLAKE3 version:
        * Renamed the header macro guard with `LLVM_C_` prefix
        * Renamed the C symbols to add the `llvm_` prefix
    * Added a top header comment that references the CC0 license and points to the `LICENSE` file in the repo.
* `blake3_impl.h`: Added `#define`s to remove some of `llvm_` prefixes for the rest of the internal implementation.
* Implementation files:
    * Added a top header comment for `blake.c`
    * Used `llvm_` prefix for the C public API functions
    * Used `LLVM_LIBRARY_VISIBILITY` for internal implementation functions
    * Added `.private_extern`/`.hidden` in assembly files to reduce visibility of the internal implementation functions
* `README.md`:
    * added a note about where the sources originated from
    * Used the C++ BLAKE3 class and `llvm_` prefixed C API in place of examples and API documentation.
    * Removed instructions about how to build the files.

Added: 
    

Modified: 
    llvm/include/llvm-c/blake3.h
    llvm/include/llvm/Support/BLAKE3.h
    llvm/lib/Support/BLAKE3/README.md
    llvm/lib/Support/BLAKE3/blake3.c
    llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
    llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
    llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
    llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
    llvm/lib/Support/BLAKE3/blake3_dispatch.c
    llvm/lib/Support/BLAKE3/blake3_impl.h
    llvm/lib/Support/BLAKE3/blake3_neon.c
    llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
    llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
    llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
    llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S

Removed: 
    llvm/lib/Support/BLAKE3/blake3.h


################################################################################
diff  --git a/llvm/include/llvm-c/blake3.h b/llvm/include/llvm-c/blake3.h
index 7caf9b4b52995..679477c3aa7f0 100644
--- a/llvm/include/llvm-c/blake3.h
+++ b/llvm/include/llvm-c/blake3.h
@@ -1,5 +1,21 @@
-#ifndef BLAKE3_H
-#define BLAKE3_H
+/*===-- llvm-c/blake3.h - BLAKE3 C Interface ----------------------*- C -*-===*\
+|*                                                                            *|
+|* Released into the public domain with CC0 1.0                               *|
+|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info.                            *|
+|* SPDX-License-Identifier: CC0-1.0                                           *|
+|*                                                                            *|
+|*===----------------------------------------------------------------------===*|
+|*                                                                            *|
+|* This header declares the C interface to LLVM's BLAKE3 implementation.      *|
+|* Original BLAKE3 C API: https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c  *|
+|*                                                                            *|
+|* Symbols are prefixed with 'llvm' to avoid a potential conflict with        *|
+|* another BLAKE3 version within the same program.                            *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
+#ifndef LLVM_C_BLAKE3_H
+#define LLVM_C_BLAKE3_H
 
 #include <stddef.h>
 #include <stdint.h>
@@ -8,53 +24,56 @@
 extern "C" {
 #endif
 
-#define BLAKE3_VERSION_STRING "1.3.1"
-#define BLAKE3_KEY_LEN 32
-#define BLAKE3_OUT_LEN 32
-#define BLAKE3_BLOCK_LEN 64
-#define BLAKE3_CHUNK_LEN 1024
-#define BLAKE3_MAX_DEPTH 54
+#define LLVM_BLAKE3_VERSION_STRING "1.3.1"
+#define LLVM_BLAKE3_KEY_LEN 32
+#define LLVM_BLAKE3_OUT_LEN 32
+#define LLVM_BLAKE3_BLOCK_LEN 64
+#define LLVM_BLAKE3_CHUNK_LEN 1024
+#define LLVM_BLAKE3_MAX_DEPTH 54
 
 // This struct is a private implementation detail. It has to be here because
-// it's part of blake3_hasher below.
+// it's part of llvm_blake3_hasher below.
 typedef struct {
   uint32_t cv[8];
   uint64_t chunk_counter;
-  uint8_t buf[BLAKE3_BLOCK_LEN];
+  uint8_t buf[LLVM_BLAKE3_BLOCK_LEN];
   uint8_t buf_len;
   uint8_t blocks_compressed;
   uint8_t flags;
-} blake3_chunk_state;
+} llvm_blake3_chunk_state;
 
 typedef struct {
   uint32_t key[8];
-  blake3_chunk_state chunk;
+  llvm_blake3_chunk_state chunk;
   uint8_t cv_stack_len;
   // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
   // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
   // requires a 4th entry, rather than merging everything down to 1, because we
   // don't know whether more input is coming. This is 
diff erent from how the
   // reference implementation does things.
-  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
-} blake3_hasher;
-
-const char *blake3_version(void);
-void blake3_hasher_init(blake3_hasher *self);
-void blake3_hasher_init_keyed(blake3_hasher *self,
-                              const uint8_t key[BLAKE3_KEY_LEN]);
-void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
-void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
-                                       size_t context_len);
-void blake3_hasher_update(blake3_hasher *self, const void *input,
-                          size_t input_len);
-void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
-                            size_t out_len);
-void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
-                                 uint8_t *out, size_t out_len);
-void blake3_hasher_reset(blake3_hasher *self);
+  uint8_t cv_stack[(LLVM_BLAKE3_MAX_DEPTH + 1) * LLVM_BLAKE3_OUT_LEN];
+} llvm_blake3_hasher;
+
+const char *llvm_blake3_version(void);
+void llvm_blake3_hasher_init(llvm_blake3_hasher *self);
+void llvm_blake3_hasher_init_keyed(llvm_blake3_hasher *self,
+                                   const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
+void llvm_blake3_hasher_init_derive_key(llvm_blake3_hasher *self,
+                                        const char *context);
+void llvm_blake3_hasher_init_derive_key_raw(llvm_blake3_hasher *self,
+                                            const void *context,
+                                            size_t context_len);
+void llvm_blake3_hasher_update(llvm_blake3_hasher *self, const void *input,
+                               size_t input_len);
+void llvm_blake3_hasher_finalize(const llvm_blake3_hasher *self, uint8_t *out,
+                                 size_t out_len);
+void llvm_blake3_hasher_finalize_seek(const llvm_blake3_hasher *self,
+                                      uint64_t seek, uint8_t *out,
+                                      size_t out_len);
+void llvm_blake3_hasher_reset(llvm_blake3_hasher *self);
 
 #ifdef __cplusplus
 }
 #endif
 
-#endif /* BLAKE3_H */
+#endif /* LLVM_C_BLAKE3_H */

diff  --git a/llvm/include/llvm/Support/BLAKE3.h b/llvm/include/llvm/Support/BLAKE3.h
index fb18b279078d5..ade5201904655 100644
--- a/llvm/include/llvm/Support/BLAKE3.h
+++ b/llvm/include/llvm/Support/BLAKE3.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
 
-/// The constant \p BLAKE3_OUT_LEN provides the default output length,
+/// The constant \p LLVM_BLAKE3_OUT_LEN provides the default output length,
 /// 32 bytes, which is recommended for most callers.
 ///
 /// Outputs shorter than the default length of 32 bytes (256 bits) provide
@@ -31,7 +31,7 @@ namespace llvm {
 /// Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly
 /// requesting a short output is equivalent to truncating the default-length
 /// output.
-template <size_t NumBytes = BLAKE3_OUT_LEN>
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
 using BLAKE3Result = std::array<uint8_t, NumBytes>;
 
 /// A class that wrap the BLAKE3 algorithm.
@@ -40,37 +40,38 @@ class BLAKE3 {
   BLAKE3() { init(); }
 
   /// Reinitialize the internal state
-  void init() { blake3_hasher_init(&Hasher); }
+  void init() { llvm_blake3_hasher_init(&Hasher); }
 
   /// Digest more data.
   void update(ArrayRef<uint8_t> Data) {
-    blake3_hasher_update(&Hasher, Data.data(), Data.size());
+    llvm_blake3_hasher_update(&Hasher, Data.data(), Data.size());
   }
 
   /// Digest more data.
   void update(StringRef Str) {
-    blake3_hasher_update(&Hasher, Str.data(), Str.size());
+    llvm_blake3_hasher_update(&Hasher, Str.data(), Str.size());
   }
 
   /// Finalize the hasher and put the result in \p Result.
   /// This doesn't modify the hasher itself, and it's possible to finalize again
   /// after adding more input.
-  template <size_t NumBytes = BLAKE3_OUT_LEN>
+  template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
   void final(BLAKE3Result<NumBytes> &Result) {
-    blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
+    llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
   }
 
   /// Finalize the hasher and return an output of any length, given in bytes.
   /// This doesn't modify the hasher itself, and it's possible to finalize again
   /// after adding more input.
-  template <size_t NumBytes = BLAKE3_OUT_LEN> BLAKE3Result<NumBytes> final() {
+  template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+  BLAKE3Result<NumBytes> final() {
     BLAKE3Result<NumBytes> Result;
-    blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
+    llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
     return Result;
   }
 
   /// Returns a BLAKE3 hash for the given data.
-  template <size_t NumBytes = BLAKE3_OUT_LEN>
+  template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
   static BLAKE3Result<NumBytes> hash(ArrayRef<uint8_t> Data) {
     BLAKE3 Hasher;
     Hasher.update(Data);
@@ -78,7 +79,7 @@ class BLAKE3 {
   }
 
 private:
-  blake3_hasher Hasher;
+  llvm_blake3_hasher Hasher;
 };
 
 } // namespace llvm

diff  --git a/llvm/lib/Support/BLAKE3/README.md b/llvm/lib/Support/BLAKE3/README.md
index 5e35fe0fded3d..c4a87c7806410 100644
--- a/llvm/lib/Support/BLAKE3/README.md
+++ b/llvm/lib/Support/BLAKE3/README.md
@@ -1,12 +1,14 @@
-The official C implementation of BLAKE3.
+Implementation of BLAKE3, originating from https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c
 
 # Example
 
 An example program that hashes bytes from standard input and prints the
 result:
 
-```c
-#include "blake3.h"
+Using the C++ API:
+
+```c++
+#include "llvm/Support/BLAKE3.h"
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -15,15 +17,14 @@ result:
 
 int main() {
   // Initialize the hasher.
-  blake3_hasher hasher;
-  blake3_hasher_init(&hasher);
+  llvm::BLAKE3 hasher;
 
   // Read input bytes from stdin.
-  unsigned char buf[65536];
+  char buf[65536];
   while (1) {
     ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
     if (n > 0) {
-      blake3_hasher_update(&hasher, buf, n);
+      hasher.update(llvm::StringRef(buf, n));
     } else if (n == 0) {
       break; // end of file
     } else {
@@ -32,42 +33,80 @@ int main() {
     }
   }
 
-  // Finalize the hash. BLAKE3_OUT_LEN is the default output length, 32 bytes.
-  uint8_t output[BLAKE3_OUT_LEN];
-  blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN);
+  // Finalize the hash. Default output length is 32 bytes.
+  auto output = hasher.final();
 
   // Print the hash as hexadecimal.
-  for (size_t i = 0; i < BLAKE3_OUT_LEN; i++) {
-    printf("%02x", output[i]);
+  for (uint8_t byte : output) {
+    printf("%02x", byte);
   }
   printf("\n");
   return 0;
 }
 ```
 
-The code above is included in this directory as `example.c`. If you're
-on x86\_64 with a Unix-like OS, you can compile a working binary like
-this:
+Using the C API:
+
+```c
+#include "llvm-c/blake3.h"
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+int main() {
+  // Initialize the hasher.
+  llvm_blake3_hasher hasher;
+  llvm_blake3_hasher_init(&hasher);
 
-```bash
-gcc -O3 -o example example.c blake3.c blake3_dispatch.c blake3_portable.c \
-    blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S \
-    blake3_avx512_x86-64_unix.S
+  // Read input bytes from stdin.
+  unsigned char buf[65536];
+  while (1) {
+    ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
+    if (n > 0) {
+      llvm_blake3_hasher_update(&hasher, buf, n);
+    } else if (n == 0) {
+      break; // end of file
+    } else {
+      fprintf(stderr, "read failed: %s\n", strerror(errno));
+      exit(1);
+    }
+  }
+
+  // Finalize the hash. LLVM_BLAKE3_OUT_LEN is the default output length, 32 bytes.
+  uint8_t output[LLVM_BLAKE3_OUT_LEN];
+  llvm_blake3_hasher_finalize(&hasher, output, LLVM_BLAKE3_OUT_LEN);
+
+  // Print the hash as hexadecimal.
+  for (size_t i = 0; i < LLVM_BLAKE3_OUT_LEN; i++) {
+    printf("%02x", output[i]);
+  }
+  printf("\n");
+  return 0;
+}
 ```
 
 # API
 
-## The Struct
+## The Class/Struct
 
+```c++
+class BLAKE3 {
+  // API
+private:
+  llvm_blake3_hasher Hasher;
+};
+```
 ```c
 typedef struct {
   // private fields
-} blake3_hasher;
+} llvm_blake3_hasher;
 ```
 
 An incremental BLAKE3 hashing state, which can accept any number of
 updates. This implementation doesn't allocate any heap memory, but
-`sizeof(blake3_hasher)` itself is relatively large, currently 1912 bytes
+`sizeof(llvm_blake3_hasher)` itself is relatively large, currently 1912 bytes
 on x86-64. This size can be reduced by restricting the maximum input
 length, as described in Section 5.4 of [the BLAKE3
 spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf),
@@ -75,18 +114,28 @@ but this implementation doesn't currently support that strategy.
 
 ## Common API Functions
 
+```c++
+BLAKE3::BLAKE3();
+
+void BLAKE3::init();
+```
 ```c
-void blake3_hasher_init(
-  blake3_hasher *self);
+void llvm_blake3_hasher_init(
+  llvm_blake3_hasher *self);
 ```
 
-Initialize a `blake3_hasher` in the default hashing mode.
+Initialize a `llvm_blake3_hasher` in the default hashing mode.
 
 ---
 
+```c++
+void BLAKE3::update(ArrayRef<uint8_t> Data);
+
+void BLAKE3::update(StringRef Str);
+```
 ```c
-void blake3_hasher_update(
-  blake3_hasher *self,
+void llvm_blake3_hasher_update(
+  llvm_blake3_hasher *self,
   const void *input,
   size_t input_len);
 ```
@@ -95,16 +144,26 @@ Add input to the hasher. This can be called any number of times.
 
 ---
 
+```c++
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+using BLAKE3Result = std::array<uint8_t, NumBytes>;
+
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+void BLAKE3::final(BLAKE3Result<NumBytes> &Result);
+
+template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
+BLAKE3Result<NumBytes> final();
+```
 ```c
-void blake3_hasher_finalize(
-  const blake3_hasher *self,
+void llvm_blake3_hasher_finalize(
+  const llvm_blake3_hasher *self,
   uint8_t *out,
   size_t out_len);
 ```
 
 Finalize the hasher and return an output of any length, given in bytes.
 This doesn't modify the hasher itself, and it's possible to finalize
-again after adding more input. The constant `BLAKE3_OUT_LEN` provides
+again after adding more input. The constant `LLVM_BLAKE3_OUT_LEN` provides
 the default output length, 32 bytes, which is recommended for most
 callers.
 
@@ -121,25 +180,25 @@ output. (Note that this is 
diff erent between BLAKE2 and BLAKE3.)
 ## Less Common API Functions
 
 ```c
-void blake3_hasher_init_keyed(
-  blake3_hasher *self,
-  const uint8_t key[BLAKE3_KEY_LEN]);
+void llvm_blake3_hasher_init_keyed(
+  llvm_blake3_hasher *self,
+  const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
 ```
 
-Initialize a `blake3_hasher` in the keyed hashing mode. The key must be
+Initialize a `llvm_blake3_hasher` in the keyed hashing mode. The key must be
 exactly 32 bytes.
 
 ---
 
 ```c
-void blake3_hasher_init_derive_key(
-  blake3_hasher *self,
+void llvm_blake3_hasher_init_derive_key(
+  llvm_blake3_hasher *self,
   const char *context);
 ```
 
-Initialize a `blake3_hasher` in the key derivation mode. The context
+Initialize a `llvm_blake3_hasher` in the key derivation mode. The context
 string is given as an initialization parameter, and afterwards input key
-material should be given with `blake3_hasher_update`. The context string
+material should be given with `llvm_blake3_hasher_update`. The context string
 is a null-terminated C string which should be **hardcoded, globally
 unique, and application-specific**. The context string should not
 include any dynamic input like salts, nonces, or identifiers read from a
@@ -148,24 +207,24 @@ database at runtime. A good default format for the context string is
 2019-12-25 16:18:03 session tokens v1"`.
 
 This function is intended for application code written in C. For
-language bindings, see `blake3_hasher_init_derive_key_raw` below.
+language bindings, see `llvm_blake3_hasher_init_derive_key_raw` below.
 
 ---
 
 ```c
-void blake3_hasher_init_derive_key_raw(
-  blake3_hasher *self,
+void llvm_blake3_hasher_init_derive_key_raw(
+  llvm_blake3_hasher *self,
   const void *context,
   size_t context_len);
 ```
 
-As `blake3_hasher_init_derive_key` above, except that the context string
+As `llvm_blake3_hasher_init_derive_key` above, except that the context string
 is given as a pointer to an array of arbitrary bytes with a provided
 length. This is intended for writing language bindings, where C string
 conversion would add unnecessary overhead and new error cases. Unicode
 strings should be encoded as UTF-8.
 
-Application code in C should prefer `blake3_hasher_init_derive_key`,
+Application code in C should prefer `llvm_blake3_hasher_init_derive_key`,
 which takes the context as a C string. If you need to use arbitrary
 bytes as a context string in application code, consider whether you're
 violating the requirement that context strings should be hardcoded.
@@ -173,14 +232,14 @@ violating the requirement that context strings should be hardcoded.
 ---
 
 ```c
-void blake3_hasher_finalize_seek(
-  const blake3_hasher *self,
+void llvm_blake3_hasher_finalize_seek(
+  const llvm_blake3_hasher *self,
   uint64_t seek,
   uint8_t *out,
   size_t out_len);
 ```
 
-The same as `blake3_hasher_finalize`, but with an additional `seek`
+The same as `llvm_blake3_hasher_finalize`, but with an additional `seek`
 parameter for the starting byte position in the output stream. To
 efficiently stream a large output without allocating memory, call this
 function in a loop, incrementing `seek` by the output length each time.
@@ -188,26 +247,20 @@ function in a loop, incrementing `seek` by the output length each time.
 ---
 
 ```c
-void blake3_hasher_reset(
-  blake3_hasher *self);
+void llvm_blake3_hasher_reset(
+  llvm_blake3_hasher *self);
 ```
 
 Reset the hasher to its initial state, prior to any calls to
-`blake3_hasher_update`. Currently this is no 
diff erent from calling
-`blake3_hasher_init` or similar again. However, if this implementation gains
-multithreading support in the future, and if `blake3_hasher` holds (optional)
-threading resources, this function will reuse those resources. Until then, this
-is mainly for feature compatibility with the Rust implementation.
+`llvm_blake3_hasher_update`. Currently this is no 
diff erent from calling
+`llvm_blake3_hasher_init` or similar again. However, if this implementation gains
+multithreading support in the future, and if `llvm_blake3_hasher` holds (optional)
+threading resources, this function will reuse those resources.
 
 
 # Building
 
-This implementation is just C and assembly files. It doesn't include a
-public-facing build system. (The `Makefile` in this directory is only
-for testing.) Instead, the intention is that you can include these files
-in whatever build system you're already using. This section describes
-the commands your build system should execute, or which you can execute
-by hand. Note that these steps may change in future versions.
+This implementation is just C and assembly files.
 
 ## x86
 
@@ -225,92 +278,19 @@ 
diff erent compilers, and they build more quickly. On the other hand, the
 assembly versions are x86\_64-only, and you need to select the right
 flavor for your target platform.
 
-Here's an example of building a shared library on x86\_64 Linux using
-the assembly implementations:
-
-```bash
-gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c \
-    blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S \
-    blake3_avx512_x86-64_unix.S
-```
-
-When building the intrinsics-based implementations, you need to build
-each implementation separately, with the corresponding instruction set
-explicitly enabled in the compiler. Here's the same shared library using
-the intrinsics-based implementations:
-
-```bash
-gcc -c -fPIC -O3 -msse2 blake3_sse2.c -o blake3_sse2.o
-gcc -c -fPIC -O3 -msse4.1 blake3_sse41.c -o blake3_sse41.o
-gcc -c -fPIC -O3 -mavx2 blake3_avx2.c -o blake3_avx2.o
-gcc -c -fPIC -O3 -mavx512f -mavx512vl blake3_avx512.c -o blake3_avx512.o
-gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c \
-    blake3_avx2.o blake3_avx512.o blake3_sse41.o blake3_sse2.o
-```
-
-Note above that building `blake3_avx512.c` requires both `-mavx512f` and
-`-mavx512vl` under GCC and Clang. Under MSVC, the single `/arch:AVX512`
-flag is sufficient. The MSVC equivalent of `-mavx2` is `/arch:AVX2`.
-MSVC enables SSE2 and SSE4.1 by defaut, and it doesn't have a
-corresponding flag.
-
-If you want to omit SIMD code entirely, you need to explicitly disable
-each instruction set. Here's an example of building a shared library on
-x86 with only portable code:
-
-```bash
-gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX2 \
-    -DBLAKE3_NO_AVX512 blake3.c blake3_dispatch.c blake3_portable.c
-```
-
 ## ARM NEON
 
 The NEON implementation is enabled by default on AArch64, but not on
 other ARM targets, since not all of them support it. To enable it, set
-`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
-ARM Linux with NEON support:
-
-```bash
-gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \
-    blake3_portable.c blake3_neon.c
-```
+`BLAKE3_USE_NEON=1`.
 
 To explicitiy disable using NEON instructions on AArch64, set
 `BLAKE3_USE_NEON=0`.
 
-```bash
-gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \
-    blake3_portable.c 
-```
-
-Note that on some targets (ARMv7 in particular), extra flags may be
-required to activate NEON support in the compiler. If you see an error
-like...
-
-```
-/usr/lib/gcc/armv7l-unknown-linux-gnueabihf/9.2.0/include/arm_neon.h:635:1: error: inlining failed
-in call to always_inline ‘vaddq_u32’: target specific option mismatch
-```
-
-...then you may need to add something like `-mfpu=neon-vfpv4
--mfloat-abi=hard`.
-
 ## Other Platforms
 
-The portable implementation should work on most other architectures. For
-example:
-
-```bash
-gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c
-```
+The portable implementation should work on most other architectures.
 
 # Multithreading
 
-Unlike the Rust implementation, the C implementation doesn't currently support
-multithreading. A future version of this library could add support by taking an
-optional dependency on OpenMP or similar. Alternatively, we could expose a
-lower-level API to allow callers to implement concurrency themselves. The
-former would be more convenient and less error-prone, but the latter would give
-callers the maximum possible amount of control. The best choice here depends on
-the specific use case, so if you have a use case for multithreaded hashing in
-C, please file a GitHub issue and let us know.
+The implementation doesn't currently support multithreading.

diff  --git a/llvm/lib/Support/BLAKE3/blake3.c b/llvm/lib/Support/BLAKE3/blake3.c
index 1239433c65761..d66771b90f5f9 100644
--- a/llvm/lib/Support/BLAKE3/blake3.c
+++ b/llvm/lib/Support/BLAKE3/blake3.c
@@ -1,11 +1,18 @@
+/*===-- blake3.c - BLAKE3 C Implementation ------------------------*- C -*-===*\
+|*                                                                            *|
+|* Released into the public domain with CC0 1.0                               *|
+|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info.                            *|
+|* SPDX-License-Identifier: CC0-1.0                                           *|
+|*                                                                            *|
+\*===----------------------------------------------------------------------===*/
+
 #include <assert.h>
 #include <stdbool.h>
 #include <string.h>
 
-#include "blake3.h"
 #include "blake3_impl.h"
 
-const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
+const char *llvm_blake3_version(void) { return BLAKE3_VERSION_STRING; }
 
 INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
                              uint8_t flags) {
@@ -366,29 +373,29 @@ INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
   self->cv_stack_len = 0;
 }
 
-void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
+void llvm_blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
 
-void blake3_hasher_init_keyed(blake3_hasher *self,
+void llvm_blake3_hasher_init_keyed(blake3_hasher *self,
                               const uint8_t key[BLAKE3_KEY_LEN]) {
   uint32_t key_words[8];
   load_key_words(key, key_words);
   hasher_init_base(self, key_words, KEYED_HASH);
 }
 
-void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
+void llvm_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
                                        size_t context_len) {
   blake3_hasher context_hasher;
   hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);
-  blake3_hasher_update(&context_hasher, context, context_len);
+  llvm_blake3_hasher_update(&context_hasher, context, context_len);
   uint8_t context_key[BLAKE3_KEY_LEN];
-  blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
+  llvm_blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
   uint32_t context_key_words[8];
   load_key_words(context_key, context_key_words);
   hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);
 }
 
-void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
-  blake3_hasher_init_derive_key_raw(self, context, strlen(context));
+void llvm_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
+  llvm_blake3_hasher_init_derive_key_raw(self, context, strlen(context));
 }
 
 // As described in hasher_push_cv() below, we do "lazy merging", delaying
@@ -452,7 +459,7 @@ INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
   self->cv_stack_len += 1;
 }
 
-void blake3_hasher_update(blake3_hasher *self, const void *input,
+void llvm_blake3_hasher_update(blake3_hasher *self, const void *input,
                           size_t input_len) {
   // Explicitly checking for zero avoids causing UB by passing a null pointer
   // to memcpy. This comes up in practice with things like:
@@ -561,12 +568,12 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
   }
 }
 
-void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
+void llvm_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
                             size_t out_len) {
-  blake3_hasher_finalize_seek(self, 0, out, out_len);
+  llvm_blake3_hasher_finalize_seek(self, 0, out, out_len);
 }
 
-void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
+void llvm_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
                                  uint8_t *out, size_t out_len) {
   // Explicitly checking for zero avoids causing UB by passing a null pointer
   // to memcpy. This comes up in practice with things like:
@@ -610,7 +617,7 @@ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
   output_root_bytes(&output, seek, out, out_len);
 }
 
-void blake3_hasher_reset(blake3_hasher *self) {
+void llvm_blake3_hasher_reset(blake3_hasher *self) {
   chunk_state_reset(&self->chunk, self->key, 0);
   self->cv_stack_len = 0;
 }

diff  --git a/llvm/lib/Support/BLAKE3/blake3.h b/llvm/lib/Support/BLAKE3/blake3.h
deleted file mode 100644
index 7caf9b4b52995..0000000000000
--- a/llvm/lib/Support/BLAKE3/blake3.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef BLAKE3_H
-#define BLAKE3_H
-
-#include <stddef.h>
-#include <stdint.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define BLAKE3_VERSION_STRING "1.3.1"
-#define BLAKE3_KEY_LEN 32
-#define BLAKE3_OUT_LEN 32
-#define BLAKE3_BLOCK_LEN 64
-#define BLAKE3_CHUNK_LEN 1024
-#define BLAKE3_MAX_DEPTH 54
-
-// This struct is a private implementation detail. It has to be here because
-// it's part of blake3_hasher below.
-typedef struct {
-  uint32_t cv[8];
-  uint64_t chunk_counter;
-  uint8_t buf[BLAKE3_BLOCK_LEN];
-  uint8_t buf_len;
-  uint8_t blocks_compressed;
-  uint8_t flags;
-} blake3_chunk_state;
-
-typedef struct {
-  uint32_t key[8];
-  blake3_chunk_state chunk;
-  uint8_t cv_stack_len;
-  // The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
-  // with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
-  // requires a 4th entry, rather than merging everything down to 1, because we
-  // don't know whether more input is coming. This is 
diff erent from how the
-  // reference implementation does things.
-  uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
-} blake3_hasher;
-
-const char *blake3_version(void);
-void blake3_hasher_init(blake3_hasher *self);
-void blake3_hasher_init_keyed(blake3_hasher *self,
-                              const uint8_t key[BLAKE3_KEY_LEN]);
-void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
-void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
-                                       size_t context_len);
-void blake3_hasher_update(blake3_hasher *self, const void *input,
-                          size_t input_len);
-void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
-                            size_t out_len);
-void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
-                                 uint8_t *out, size_t out_len);
-void blake3_hasher_reset(blake3_hasher *self);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* BLAKE3_H */

diff  --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
index 812bb8568295a..9e7962a809702 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_unix.S
@@ -12,7 +12,15 @@
 #define _CET_ENDBR
 #endif
 
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
 .intel_syntax noprefix
+HIDDEN _blake3_hash_many_avx2
+HIDDEN blake3_hash_many_avx2
 .global _blake3_hash_many_avx2
 .global blake3_hash_many_avx2
 #ifdef __APPLE__

diff  --git a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
index bb58d2ae64b13..ef19edbbed4ea 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx2_x86-64_windows_gnu.S
@@ -1,4 +1,6 @@
 .intel_syntax noprefix
+.hidden _blake3_hash_many_avx2
+.hidden blake3_hash_many_avx2
 .global _blake3_hash_many_avx2
 .global blake3_hash_many_avx2
 .section .text

diff  --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
index a06aede0f1a91..b2376b986b6b5 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_unix.S
@@ -12,7 +12,19 @@
 #define _CET_ENDBR
 #endif
 
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
 .intel_syntax noprefix
+HIDDEN _blake3_hash_many_avx512
+HIDDEN blake3_hash_many_avx512
+HIDDEN blake3_compress_in_place_avx512
+HIDDEN _blake3_compress_in_place_avx512
+HIDDEN blake3_compress_xof_avx512
+HIDDEN _blake3_compress_xof_avx512
 .global _blake3_hash_many_avx512
 .global blake3_hash_many_avx512
 .global blake3_compress_in_place_avx512

diff  --git a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
index e10b9f36cbccb..6d17cbdb0c876 100644
--- a/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_avx512_x86-64_windows_gnu.S
@@ -1,5 +1,11 @@
 .intel_syntax noprefix
 
+.hidden _blake3_hash_many_avx512
+.hidden blake3_hash_many_avx512
+.hidden blake3_compress_in_place_avx512
+.hidden _blake3_compress_in_place_avx512
+.hidden blake3_compress_xof_avx512
+.hidden _blake3_compress_xof_avx512
 .global _blake3_hash_many_avx512
 .global blake3_hash_many_avx512
 .global blake3_compress_in_place_avx512

diff  --git a/llvm/lib/Support/BLAKE3/blake3_dispatch.c b/llvm/lib/Support/BLAKE3/blake3_dispatch.c
index b49805897a0fe..072dab8f1aa9b 100644
--- a/llvm/lib/Support/BLAKE3/blake3_dispatch.c
+++ b/llvm/lib/Support/BLAKE3/blake3_dispatch.c
@@ -78,6 +78,7 @@ static /* Allow the variable to be controlled manually for testing */
 #endif
     enum cpu_feature g_cpu_features = UNDEFINED;
 
+LLVM_ATTRIBUTE_USED
 #if !defined(BLAKE3_TESTING)
 static
 #endif

diff  --git a/llvm/lib/Support/BLAKE3/blake3_impl.h b/llvm/lib/Support/BLAKE3/blake3_impl.h
index cc5672f2274be..180d0a6eeda8d 100644
--- a/llvm/lib/Support/BLAKE3/blake3_impl.h
+++ b/llvm/lib/Support/BLAKE3/blake3_impl.h
@@ -7,7 +7,19 @@
 #include <stdint.h>
 #include <string.h>
 
-#include "blake3.h"
+#include "llvm-c/blake3.h"
+// For \p LLVM_LIBRARY_VISIBILITY
+#include "llvm/Support/Compiler.h"
+
+// Remove the 'llvm_' prefix for the rest of the internal implementation.
+#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
+#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
+#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
+#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
+#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
+#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
+#define blake3_hasher llvm_blake3_hasher
+#define blake3_chunk_state llvm_blake3_chunk_state
 
 // internal flags
 enum blake3_flags {
@@ -178,35 +190,42 @@ INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
   store32(&bytes_out[7 * 4], cv_words[7]);
 }
 
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_in_place(uint32_t cv[8],
                               const uint8_t block[BLAKE3_BLOCK_LEN],
                               uint8_t block_len, uint64_t counter,
                               uint8_t flags);
 
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_xof(const uint32_t cv[8],
                          const uint8_t block[BLAKE3_BLOCK_LEN],
                          uint8_t block_len, uint64_t counter, uint8_t flags,
                          uint8_t out[64]);
 
+LLVM_LIBRARY_VISIBILITY
 void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
                       size_t blocks, const uint32_t key[8], uint64_t counter,
                       bool increment_counter, uint8_t flags,
                       uint8_t flags_start, uint8_t flags_end, uint8_t *out);
 
+LLVM_LIBRARY_VISIBILITY
 size_t blake3_simd_degree(void);
 
 
 // Declarations for implementation-specific functions.
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_in_place_portable(uint32_t cv[8],
                                        const uint8_t block[BLAKE3_BLOCK_LEN],
                                        uint8_t block_len, uint64_t counter,
                                        uint8_t flags);
 
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_xof_portable(const uint32_t cv[8],
                                   const uint8_t block[BLAKE3_BLOCK_LEN],
                                   uint8_t block_len, uint64_t counter,
                                   uint8_t flags, uint8_t out[64]);
 
+LLVM_LIBRARY_VISIBILITY
 void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
                                size_t blocks, const uint32_t key[8],
                                uint64_t counter, bool increment_counter,
@@ -215,14 +234,17 @@ void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
 
 #if defined(IS_X86)
 #if !defined(BLAKE3_NO_SSE2)
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_in_place_sse2(uint32_t cv[8],
                                    const uint8_t block[BLAKE3_BLOCK_LEN],
                                    uint8_t block_len, uint64_t counter,
                                    uint8_t flags);
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_xof_sse2(const uint32_t cv[8],
                               const uint8_t block[BLAKE3_BLOCK_LEN],
                               uint8_t block_len, uint64_t counter,
                               uint8_t flags, uint8_t out[64]);
+LLVM_LIBRARY_VISIBILITY
 void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
                            size_t blocks, const uint32_t key[8],
                            uint64_t counter, bool increment_counter,
@@ -230,14 +252,17 @@ void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
                            uint8_t flags_end, uint8_t *out);
 #endif
 #if !defined(BLAKE3_NO_SSE41)
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_in_place_sse41(uint32_t cv[8],
                                     const uint8_t block[BLAKE3_BLOCK_LEN],
                                     uint8_t block_len, uint64_t counter,
                                     uint8_t flags);
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_xof_sse41(const uint32_t cv[8],
                                const uint8_t block[BLAKE3_BLOCK_LEN],
                                uint8_t block_len, uint64_t counter,
                                uint8_t flags, uint8_t out[64]);
+LLVM_LIBRARY_VISIBILITY
 void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
                             size_t blocks, const uint32_t key[8],
                             uint64_t counter, bool increment_counter,
@@ -245,6 +270,7 @@ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
                             uint8_t flags_end, uint8_t *out);
 #endif
 #if !defined(BLAKE3_NO_AVX2)
+LLVM_LIBRARY_VISIBILITY
 void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
                            size_t blocks, const uint32_t key[8],
                            uint64_t counter, bool increment_counter,
@@ -252,16 +278,19 @@ void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
                            uint8_t flags_end, uint8_t *out);
 #endif
 #if !defined(BLAKE3_NO_AVX512)
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_in_place_avx512(uint32_t cv[8],
                                      const uint8_t block[BLAKE3_BLOCK_LEN],
                                      uint8_t block_len, uint64_t counter,
                                      uint8_t flags);
 
+LLVM_LIBRARY_VISIBILITY
 void blake3_compress_xof_avx512(const uint32_t cv[8],
                                 const uint8_t block[BLAKE3_BLOCK_LEN],
                                 uint8_t block_len, uint64_t counter,
                                 uint8_t flags, uint8_t out[64]);
 
+LLVM_LIBRARY_VISIBILITY
 void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
                              size_t blocks, const uint32_t key[8],
                              uint64_t counter, bool increment_counter,
@@ -271,6 +300,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
 #endif
 
 #if BLAKE3_USE_NEON == 1
+LLVM_LIBRARY_VISIBILITY
 void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
                            size_t blocks, const uint32_t key[8],
                            uint64_t counter, bool increment_counter,

diff  --git a/llvm/lib/Support/BLAKE3/blake3_neon.c b/llvm/lib/Support/BLAKE3/blake3_neon.c
index a6f6da921e197..51fb7473d83a6 100644
--- a/llvm/lib/Support/BLAKE3/blake3_neon.c
+++ b/llvm/lib/Support/BLAKE3/blake3_neon.c
@@ -228,6 +228,7 @@ INLINE void load_counters4(uint64_t counter, bool increment_counter,
       counter_high(counter + (mask & 2)), counter_high(counter + (mask & 3)));
 }
 
+static
 void blake3_hash4_neon(const uint8_t *const *inputs, size_t blocks,
                        const uint32_t key[8], uint64_t counter,
                        bool increment_counter, uint8_t flags,

diff  --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
index 99f033fefb41d..c30bd7372110b 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_unix.S
@@ -12,7 +12,19 @@
 #define _CET_ENDBR
 #endif
 
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
 .intel_syntax noprefix
+HIDDEN blake3_hash_many_sse2
+HIDDEN _blake3_hash_many_sse2
+HIDDEN blake3_compress_in_place_sse2
+HIDDEN _blake3_compress_in_place_sse2
+HIDDEN blake3_compress_xof_sse2
+HIDDEN _blake3_compress_xof_sse2
 .global blake3_hash_many_sse2
 .global _blake3_hash_many_sse2
 .global blake3_compress_in_place_sse2

diff  --git a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
index 8852ba5976e15..66470fe067806 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse2_x86-64_windows_gnu.S
@@ -1,4 +1,10 @@
 .intel_syntax noprefix
+.hidden blake3_hash_many_sse2
+.hidden _blake3_hash_many_sse2
+.hidden blake3_compress_in_place_sse2
+.hidden _blake3_compress_in_place_sse2
+.hidden blake3_compress_xof_sse2
+.hidden _blake3_compress_xof_sse2
 .global blake3_hash_many_sse2
 .global _blake3_hash_many_sse2
 .global blake3_compress_in_place_sse2

diff  --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
index a3ff64269caab..b3f35eee57aca 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_unix.S
@@ -12,7 +12,19 @@
 #define _CET_ENDBR
 #endif
 
+#ifdef __APPLE__
+#define HIDDEN .private_extern
+#else
+#define HIDDEN .hidden
+#endif
+
 .intel_syntax noprefix
+HIDDEN blake3_hash_many_sse41
+HIDDEN _blake3_hash_many_sse41
+HIDDEN blake3_compress_in_place_sse41
+HIDDEN _blake3_compress_in_place_sse41
+HIDDEN blake3_compress_xof_sse41
+HIDDEN _blake3_compress_xof_sse41
 .global blake3_hash_many_sse41
 .global _blake3_hash_many_sse41
 .global blake3_compress_in_place_sse41

diff  --git a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
index 60d0a4042e71d..749a84d7f6faa 100644
--- a/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
+++ b/llvm/lib/Support/BLAKE3/blake3_sse41_x86-64_windows_gnu.S
@@ -1,4 +1,10 @@
 .intel_syntax noprefix
+.hidden blake3_hash_many_sse41
+.hidden _blake3_hash_many_sse41
+.hidden blake3_compress_in_place_sse41
+.hidden _blake3_compress_in_place_sse41
+.hidden blake3_compress_xof_sse41
+.hidden _blake3_compress_xof_sse41
 .global blake3_hash_many_sse41
 .global _blake3_hash_many_sse41
 .global blake3_compress_in_place_sse41


        


More information about the llvm-commits mailing list