[libc] [llvm] Add vector-based strlen implementation for x86_64 and aarch64 (PR #152389)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 6 14:19:52 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-libc
Author: None (Sterling-Augustine)
<details>
<summary>Changes</summary>
These replace the default LIBC_CONF_STRING_UNSAFE_WIDE_READ implementation
on x86_64 and aarch64.
These are substantially faster than both the character-by-character
implementation and the original unsafe_wide_read implementation. Some below
I have been unable to performance-test the aarch64 version, but I suspect
speedups similar to avx2.
```
Function: strlen
Variant:
char wide ull sse2 avx2 avx512
=============================================================================================================================================================
length=1, alignment=1: 13.18 20.47 (-55.24%) 20.21 (-53.27%) 32.50 (-146.54%) 26.05 (-97.61%) 18.03 (-36.74%)
length=1, alignment=0: 12.80 34.92 (-172.89%) 20.01 (-56.39%) 17.52 (-36.86%) 17.78 (-38.92%) 18.04 (-40.94%)
length=2, alignment=2: 9.91 19.02 (-91.95%) 12.64 (-27.52%) 11.06 (-11.59%) 9.48 ( 4.38%) 9.48 ( 4.34%)
length=2, alignment=0: 9.56 26.88 (-181.24%) 12.64 (-32.31%) 11.06 (-15.73%) 11.06 (-15.72%) 11.83 (-23.80%)
length=3, alignment=3: 8.31 10.45 (-25.84%) 8.28 ( 0.32%) 8.28 ( 0.36%) 6.21 ( 25.28%) 6.21 ( 25.24%)
length=3, alignment=0: 8.39 14.53 (-73.20%) 8.28 ( 1.33%) 7.24 ( 13.69%) 7.56 ( 9.94%) 7.25 ( 13.65%)
length=4, alignment=4: 9.84 21.76 (-121.24%) 15.55 (-58.11%) 6.57 ( 33.18%) 5.02 ( 48.98%) 6.00 ( 39.00%)
length=4, alignment=0: 8.64 13.70 (-58.51%) 7.28 ( 15.73%) 6.37 ( 26.31%) 6.36 ( 26.36%) 6.36 ( 26.36%)
length=5, alignment=5: 11.85 23.81 (-100.97%) 12.17 ( -2.67%) 5.68 ( 52.09%) 4.87 ( 58.94%) 6.48 ( 45.33%)
length=5, alignment=0: 11.82 13.64 (-15.42%) 7.27 ( 38.45%) 6.36 ( 46.15%) 6.37 ( 46.11%) 6.36 ( 46.14%)
length=6, alignment=6: 10.50 19.37 (-84.56%) 13.64 (-29.93%) 6.54 ( 37.71%) 6.89 ( 34.35%) 9.45 ( 10.01%)
length=6, alignment=0: 14.96 14.05 ( 6.04%) 6.49 ( 56.62%) 5.68 ( 62.04%) 5.68 ( 62.04%) 13.15 ( 12.05%)
length=7, alignment=7: 10.97 18.02 (-64.35%) 14.59 (-33.06%) 6.36 ( 41.96%) 5.46 ( 50.25%) 5.46 ( 50.25%)
length=7, alignment=0: 10.96 15.76 (-43.77%) 15.37 (-40.15%) 6.96 ( 36.51%) 5.68 ( 48.22%) 7.04 ( 35.83%)
length=4, alignment=0: 8.66 13.69 (-58.02%) 7.28 ( 16.00%) 6.37 ( 26.44%) 6.37 ( 26.52%) 6.61 ( 23.74%)
length=4, alignment=7: 8.87 17.35 (-95.73%) 12.18 (-37.39%) 5.68 ( 35.94%) 4.87 ( 45.11%) 6.00 ( 32.36%)
length=4, alignment=2: 8.67 10.05 (-15.91%) 7.28 ( 16.01%) 7.37 ( 15.02%) 5.46 ( 37.02%) 5.47 ( 36.89%)
length=2, alignment=2: 5.64 10.01 (-77.64%) 7.29 (-29.34%) 6.37 (-13.04%) 5.46 ( 3.19%) 5.46 ( 3.19%)
length=8, alignment=0: 12.78 16.52 (-29.33%) 18.27 (-43.00%) 11.82 ( 7.47%) 9.83 ( 23.03%) 11.46 ( 10.27%)
length=8, alignment=7: 14.24 17.30 (-21.49%) 12.16 ( 14.59%) 5.68 ( 60.14%) 4.87 ( 65.83%) 6.23 ( 56.28%)
length=8, alignment=3: 12.34 26.15 (-111.98%) 12.20 ( 1.14%) 6.50 ( 47.34%) 4.87 ( 60.54%) 6.18 ( 49.94%)
length=5, alignment=3: 10.95 19.74 (-80.30%) 12.17 (-11.11%) 5.68 ( 48.16%) 4.87 ( 55.56%) 5.96 ( 45.55%)
length=16, alignment=0: 20.33 29.29 (-44.08%) 36.18 (-77.97%) 5.68 ( 72.06%) 5.68 ( 72.08%) 10.60 ( 47.86%)
length=16, alignment=7: 19.29 17.52 ( 9.16%) 12.98 ( 32.73%) 7.05 ( 63.47%) 4.87 ( 74.75%) 6.23 ( 67.71%)
length=16, alignment=4: 20.54 25.18 (-22.56%) 15.42 ( 24.92%) 7.31 ( 64.43%) 4.87 ( 76.29%) 5.98 ( 70.88%)
length=10, alignment=4: 14.59 21.26 (-45.71%) 12.17 ( 16.58%) 5.68 ( 61.07%) 4.87 ( 66.65%) 6.00 ( 58.91%)
length=32, alignment=0: 35.46 22.00 ( 37.95%) 16.22 ( 54.26%) 7.32 ( 79.35%) 5.68 ( 83.98%) 7.01 ( 80.22%)
length=32, alignment=7: 35.23 24.14 ( 31.48%) 16.22 ( 53.96%) 7.30 ( 79.28%) 8.76 ( 75.12%) 6.14 ( 82.58%)
length=32, alignment=5: 35.16 28.56 ( 18.76%) 16.22 ( 53.87%) 7.30 ( 79.23%) 6.77 ( 80.75%) 9.82 ( 72.07%)
length=21, alignment=5: 26.47 27.66 ( -4.49%) 15.04 ( 43.17%) 6.90 ( 73.95%) 4.87 ( 81.60%) 6.04 ( 77.18%)
length=64, alignment=0: 66.45 25.16 ( 62.14%) 22.70 ( 65.83%) 12.99 ( 80.44%) 7.47 ( 88.77%) 8.70 ( 86.90%)
length=64, alignment=7: 64.75 27.78 ( 57.10%) 22.72 ( 64.91%) 10.85 ( 83.25%) 7.46 ( 88.48%) 8.68 ( 86.60%)
length=64, alignment=6: 67.26 28.58 ( 57.51%) 22.70 ( 66.24%) 11.26 ( 83.25%) 9.46 ( 85.94%) 13.90 ( 79.33%)
length=42, alignment=6: 73.42 27.97 ( 61.91%) 19.46 ( 73.49%) 8.92 ( 87.84%) 6.49 ( 91.16%) 6.00 ( 91.83%)
length=128, alignment=0: 172.07 39.18 ( 77.23%) 35.68 ( 79.26%) 13.02 ( 92.43%) 12.98 ( 92.46%) 9.76 ( 94.33%)
length=128, alignment=7: 163.98 43.79 ( 73.30%) 36.03 ( 78.03%) 15.68 ( 90.44%) 11.35 ( 93.08%) 10.51 ( 93.59%)
length=128, alignment=7: 185.86 40.27 ( 78.33%) 36.04 ( 80.61%) 13.78 ( 92.58%) 11.35 ( 93.89%) 10.49 ( 94.36%)
length=85, alignment=7: 121.61 55.66 ( 54.23%) 32.34 ( 73.40%) 13.88 ( 88.59%) 7.30 ( 94.00%) 8.72 ( 92.83%)
length=256, alignment=0: 295.54 66.48 ( 77.50%) 61.63 ( 79.15%) 19.54 ( 93.39%) 12.97 ( 95.61%) 12.45 ( 95.79%)
length=256, alignment=7: 308.06 78.92 ( 74.38%) 61.63 ( 80.00%) 22.90 ( 92.57%) 12.97 ( 95.79%) 13.23 ( 95.71%)
length=256, alignment=8: 295.32 65.83 ( 77.71%) 61.62 ( 79.13%) 23.19 ( 92.15%) 12.97 ( 95.61%) 13.50 ( 95.43%)
length=170, alignment=8: 234.39 48.79 ( 79.18%) 43.79 ( 81.32%) 16.22 ( 93.08%) 13.97 ( 94.04%) 10.48 ( 95.53%)
length=512, alignment=0: 563.75 116.89 ( 79.27%) 114.99 ( 79.60%) 62.71 ( 88.88%) 19.58 ( 96.53%) 17.76 ( 96.85%)
length=512, alignment=7: 580.53 120.91 ( 79.17%) 114.47 ( 80.28%) 37.75 ( 93.50%) 19.55 ( 96.63%) 18.68 ( 96.78%)
length=512, alignment=9: 584.05 128.35 ( 78.02%) 114.74 ( 80.35%) 39.09 ( 93.31%) 19.76 ( 96.62%) 18.71 ( 96.80%)
length=341, alignment=9: 405.84 90.87 ( 77.61%) 78.79 ( 80.59%) 28.77 ( 92.91%) 14.60 ( 96.40%) 14.15 ( 96.51%)
length=1024, alignment=0: 1143.61 247.03 ( 78.40%) 243.70 ( 78.69%) 75.59 ( 93.39%) 67.02 ( 94.14%) 28.99 ( 97.46%)
length=1024, alignment=7: 1124.55 267.87 ( 76.18%) 259.16 ( 76.95%) 64.96 ( 94.22%) 33.05 ( 97.06%) 30.91 ( 97.25%)
length=1024, alignment=10: 1459.58 257.79 ( 82.34%) 239.91 ( 83.56%) 65.00 ( 95.55%) 33.10 ( 97.73%) 30.33 ( 97.92%)
length=682, alignment=10: 732.89 163.67 ( 77.67%) 170.54 ( 76.73%) 46.48 ( 93.66%) 24.32 ( 96.68%) 21.44 ( 97.07%)
length=2048, alignment=0: 2141.96 451.61 ( 78.92%) 448.00 ( 79.08%) 133.24 ( 93.78%) 61.22 ( 97.14%) 80.08 ( 96.26%)
length=2048, alignment=7: 2145.05 458.26 ( 78.64%) 449.99 ( 79.02%) 140.19 ( 93.46%) 60.26 ( 97.19%) 51.71 ( 97.59%)
length=2048, alignment=11: 2162.61 463.37 ( 78.57%) 448.07 ( 79.28%) 140.29 ( 93.51%) 59.51 ( 97.25%) 51.59 ( 97.61%)
length=1365, alignment=11: 1439.74 322.86 ( 77.58%) 310.84 ( 78.41%) 116.08 ( 91.94%) 42.43 ( 97.05%) 36.15 ( 97.49%)
length=4096, alignment=0: 4278.68 871.60 ( 79.63%) 865.25 ( 79.78%) 252.50 ( 94.10%) 161.17 ( 96.23%) 94.97 ( 97.78%)
length=4096, alignment=7: 4253.01 871.62 ( 79.51%) 864.21 ( 79.68%) 243.90 ( 94.27%) 171.17 ( 95.98%) 95.14 ( 97.76%)
length=4096, alignment=12: 4252.18 879.66 ( 79.31%) 863.68 ( 79.69%) 244.26 ( 94.26%) 185.36 ( 95.64%) 93.61 ( 97.80%)
length=2730, alignment=12: 2868.22 597.65 ( 79.16%) 586.22 ( 79.56%) 175.09 ( 93.90%) 120.35 ( 95.80%) 101.35 ( 96.47%)
length=0, alignment=0: 4.87 8.11 (-66.73%) 6.49 (-33.34%) 5.80 (-19.26%) 5.68 (-16.67%) 6.86 (-40.91%)
length=32, alignment=0: 33.82 22.36 ( 33.89%) 17.03 ( 49.66%) 7.30 ( 78.42%) 5.68 ( 83.22%) 7.50 ( 77.83%)
length=64, alignment=0: 66.20 26.76 ( 59.58%) 23.22 ( 64.93%) 12.99 ( 80.37%) 7.34 ( 88.92%) 8.44 ( 87.25%)
length=96, alignment=0: 130.26 31.62 ( 75.72%) 30.00 ( 76.97%) 11.39 ( 91.26%) 10.54 ( 91.91%) 8.68 ( 93.34%)
length=128, alignment=0: 164.66 39.05 ( 76.29%) 35.68 ( 78.33%) 13.07 ( 92.07%) 12.97 ( 92.12%) 9.59 ( 94.18%)
length=160, alignment=0: 196.63 45.18 ( 77.02%) 42.16 ( 78.56%) 14.65 ( 92.55%) 10.87 ( 94.47%) 9.31 ( 95.27%)
length=192, alignment=0: 225.50 52.71 ( 76.63%) 49.61 ( 78.00%) 16.22 ( 92.81%) 11.36 ( 94.96%) 11.08 ( 95.09%)
length=224, alignment=0: 261.08 57.57 ( 77.95%) 55.82 ( 78.62%) 17.84 ( 93.17%) 12.16 ( 95.34%) 11.51 ( 95.59%)
length=256, alignment=0: 295.13 65.56 ( 77.79%) 62.59 ( 78.79%) 19.46 ( 93.41%) 13.12 ( 95.56%) 12.33 ( 95.82%)
length=288, alignment=0: 325.69 72.16 ( 77.84%) 69.20 ( 78.75%) 21.08 ( 93.53%) 13.94 ( 95.72%) 12.32 ( 96.22%)
length=320, alignment=0: 364.18 78.78 ( 78.37%) 75.69 ( 79.21%) 22.71 ( 93.77%) 14.70 ( 95.96%) 14.46 ( 96.03%)
length=352, alignment=0: 391.40 84.87 ( 78.32%) 82.15 ( 79.01%) 24.50 ( 93.74%) 15.62 ( 96.01%) 14.27 ( 96.35%)
length=384, alignment=0: 428.50 91.43 ( 78.66%) 88.70 ( 79.30%) 26.16 ( 93.90%) 17.29 ( 95.97%) 15.04 ( 96.49%)
length=416, alignment=0: 457.30 98.23 ( 78.52%) 95.02 ( 79.22%) 27.81 ( 93.92%) 17.22 ( 96.23%) 15.05 ( 96.71%)
length=448, alignment=0: 488.38 104.52 ( 78.60%) 101.87 ( 79.14%) 31.22 ( 93.61%) 18.07 ( 96.30%) 16.89 ( 96.54%)
length=480, alignment=0: 526.44 109.61 ( 79.18%) 108.11 ( 79.46%) 31.11 ( 94.09%) 18.88 ( 96.41%) 17.10 ( 96.75%)
length=512, alignment=0: 556.50 117.29 ( 78.92%) 113.78 ( 79.56%) 62.57 ( 88.76%) 19.88 ( 96.43%) 17.80 ( 96.80%)
length=576, alignment=0: 622.17 152.93 ( 75.42%) 127.58 ( 79.49%) 39.34 ( 93.68%) 21.31 ( 96.58%) 19.99 ( 96.79%)
length=640, alignment=0: 691.01 142.56 ( 79.37%) 161.78 ( 76.59%) 39.20 ( 94.33%) 22.98 ( 96.67%) 20.13 ( 97.09%)
length=704, alignment=0: 756.90 156.31 ( 79.35%) 176.19 ( 76.72%) 45.03 ( 94.05%) 24.82 ( 96.72%) 22.33 ( 97.05%)
length=768, alignment=0: 826.23 193.17 ( 76.62%) 188.41 ( 77.20%) 50.81 ( 93.85%) 27.46 ( 96.68%) 23.25 ( 97.19%)
length=832, alignment=0: 890.17 204.81 ( 76.99%) 201.61 ( 77.35%) 53.77 ( 93.96%) 27.73 ( 96.88%) 25.06 ( 97.18%)
length=896, alignment=0: 959.52 217.89 ( 77.29%) 213.86 ( 77.71%) 57.99 ( 93.96%) 29.53 ( 96.92%) 26.29 ( 97.26%)
length=960, alignment=0: 1024.52 231.06 ( 77.45%) 227.05 ( 77.84%) 60.36 ( 94.11%) 32.29 ( 96.85%) 27.94 ( 97.27%)
length=1024, alignment=0: 1086.71 244.17 ( 77.53%) 239.87 ( 77.93%) 64.72 ( 94.04%) 72.38 ( 93.34%) 28.72 ( 97.36%)
length=1152, alignment=0: 1231.48 270.22 ( 78.06%) 266.47 ( 78.36%) 73.38 ( 94.04%) 40.24 ( 96.73%) 32.42 ( 97.37%)
length=1280, alignment=0: 1349.29 295.45 ( 78.10%) 292.69 ( 78.31%) 111.80 ( 91.71%) 42.44 ( 96.85%) 34.59 ( 97.44%)
length=1408, alignment=0: 1487.13 322.57 ( 78.31%) 318.18 ( 78.60%) 84.47 ( 94.32%) 44.35 ( 97.02%) 37.31 ( 97.49%)
length=1536, alignment=0: 1623.52 347.98 ( 78.57%) 344.24 ( 78.80%) 108.31 ( 93.33%) 49.82 ( 96.93%) 39.94 ( 97.54%)
length=1664, alignment=0: 1748.88 373.80 ( 78.63%) 370.03 ( 78.84%) 118.76 ( 93.21%) 52.89 ( 96.98%) 42.93 ( 97.55%)
length=1792, alignment=0: 1886.22 399.59 ( 78.82%) 397.39 ( 78.93%) 127.32 ( 93.25%) 53.64 ( 97.16%) 45.39 ( 97.59%)
length=1920, alignment=0: 2018.37 425.98 ( 78.89%) 422.31 ( 79.08%) 126.70 ( 93.72%) 57.08 ( 97.17%) 48.12 ( 97.62%)
length=2048, alignment=0: 2167.09 451.70 ( 79.16%) 447.70 ( 79.34%) 141.68 ( 93.46%) 61.63 ( 97.16%) 79.06 ( 96.35%)
length=2304, alignment=0: 2422.03 503.63 ( 79.21%) 502.23 ( 79.26%) 149.62 ( 93.82%) 73.10 ( 96.98%) 56.97 ( 97.65%)
length=2560, alignment=0: 2678.68 556.84 ( 79.21%) 553.24 ( 79.35%) 161.06 ( 93.99%) 127.74 ( 95.23%) 58.81 ( 97.80%)
length=2816, alignment=0: 2941.95 608.70 ( 79.31%) 604.03 ( 79.47%) 171.85 ( 94.16%) 87.11 ( 97.04%) 67.08 ( 97.72%)
length=3072, alignment=0: 3229.89 660.14 ( 79.56%) 659.19 ( 79.59%) 183.85 ( 94.31%) 140.25 ( 95.66%) 73.01 ( 97.74%)
length=3328, alignment=0: 3496.08 713.05 ( 79.60%) 710.00 ( 79.69%) 209.72 ( 94.00%) 138.78 ( 96.03%) 77.81 ( 97.77%)
length=3584, alignment=0: 3756.52 766.19 ( 79.60%) 763.94 ( 79.66%) 214.16 ( 94.30%) 146.36 ( 96.10%) 83.43 ( 97.78%)
length=3840, alignment=0: 4017.15 817.43 ( 79.65%) 819.77 ( 79.59%) 242.07 ( 93.97%) 164.56 ( 95.90%) 89.72 ( 97.77%)
length=4096, alignment=0: 4281.59 867.87 ( 79.73%) 864.71 ( 79.80%) 243.33 ( 94.32%) 173.11 ( 95.96%) 95.65 ( 97.77%)
length=4608, alignment=0: 4810.30 977.80 ( 79.67%) 985.03 ( 79.52%) 271.13 ( 94.36%) 190.62 ( 96.04%) 107.82 ( 97.76%)
```
length=5120, alignment=0: 5380.16 1075.77 ( 80.00%) 1071.80 ( 80.08%) 294.27 ( 94.53%) 206.04 ( 96.17%) 141.90 ( 97.36%)
length=5632, alignment=0: 5925.70 1195.61 ( 79.82%) 1193.68 ( 79.86%) 323.42 ( 94.54%) 223.55 ( 96.23%) 125.28 ( 97.89%)
length=6144, alignment=0: 6402.20 1285.52 ( 79.92%) 1281.04 ( 79.99%) 342.68 ( 94.65%) 234.84 ( 96.33%) 167.01 ( 97.39%)
length=6656, alignment=0: 6997.01 1387.32 ( 80.17%) 1384.21 ( 80.22%) 365.93 ( 94.77%) 269.89 ( 96.14%) 176.40 ( 97.48%)
length=7168, alignment=0: 7454.76 1492.10 ( 79.98%) 1488.45 ( 80.03%) 391.92 ( 94.74%) 280.81 ( 96.23%) 187.73 ( 97.48%)
length=7680, alignment=0: 8163.34 1608.43 ( 80.30%) 1615.98 ( 80.20%) 460.03 ( 94.36%) 299.86 ( 96.33%) 201.40 ( 97.53%)
---
Full diff: https://github.com/llvm/llvm-project/pull/152389.diff
6 Files Affected:
- (added) libc/src/string/inline_strlen.h (+38)
- (added) libc/src/string/memory_utils/aarch64/inline_strlen.h (+50)
- (added) libc/src/string/memory_utils/x86_64/inline_strlen.h (+116)
- (modified) libc/src/string/string_utils.h (+14-1)
- (modified) utils/bazel/llvm-project-overlay/libc/BUILD.bazel (+3)
- (modified) utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl (+1-1)
``````````diff
diff --git a/libc/src/string/inline_strlen.h b/libc/src/string/inline_strlen.h
new file mode 100644
index 0000000000000..7ea27cf6e1813
--- /dev/null
+++ b/libc/src/string/inline_strlen.h
@@ -0,0 +1,38 @@
+//===-- Strlen implementation -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRLEN_H
+
+#include "src/__support/macros/attributes.h" // LIBC_INLINE
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
+
+#include <stddef.h> // size_t
+
+#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#if defined(LIBC_TARGET_ARCH_IS_X86)
+#include "src/string/memory_utils/x86_64/inline_strlen.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_x86
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "src/string/memory_utils/aarch64/inline_memcpy.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_aarch64
+#else
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_wide_read
+#endif
+
+namespace LIBC_NAMESPACE_DECL {
+
+[[gnu::flatten]] LIBC_INLINE void
+inline_memcpy(void *__restrict dst, const void *__restrict src, size_t count) {
+ LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY(reinterpret_cast<Ptr>(dst),
+ reinterpret_cast<CPtr>(src), count);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRLEN_H
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
new file mode 100644
index 0000000000000..2b9f226d14187
--- /dev/null
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -0,0 +1,50 @@
+//===-- Strlen implementation for aarch64 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
+
+#include <arm_neon.h>
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t string_length_neon(const char* src) {
+ using Vector __attribute__((may_alias)) = uint8x8_t;
+ uintptr_t misalign_bytes = reinterpret_case<uintptr_t>(src) % sizeof(Vector);
+ Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes);
+ if (misalign_bytes) {
+ Vector v = *block_ptr;
+ Vector vcmp = vceqz_u8(v);
+ uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
+ uint64_t cmp = vget_lane_u64(cmp_mask, 0);
+ cmp = cmp >> (misalign_bytes << 3);
+ if (cmp) return __builtin_ctzl(cmp) >> 3;
+ ++block_ptr;
+ }
+ while (true) {
+ Vector v = *block_ptr;
+ Vector vcmp = vceqz_u8(v);
+ uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
+ uint64_t cmp = vget_lane_u64(cmp_mask, 0);
+ if (cmp)
+ return static_cast<size_t>(reinterpret_case<uintptr_t>(block_ptr) -
+ reinterpret_case<uintptr_t>(src) +
+ (__builtin_ctzl(cmp) >> 3));
+ block_ptr++;
+ }
+}
+
+template <typename T>
+[[maybe_unused]] LIBC_INLINE void string_length_aarch64(const char *src) {
+ return inline_string_length_neon(src);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
new file mode 100644
index 0000000000000..ffdf8938d62fd
--- /dev/null
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -0,0 +1,116 @@
+//===-- Strlen implementation for x86_64 ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
+
+#include "src/string/memory_utils/op_x86.h" // K_AVX
+
+#include <stddef.h> // size_t
+#include <x86intrin.h>
+namespace LIBC_NAMESPACE_DECL {
+
+#if defined(__SSE2__)
+[[maybe_unused]] LIBC_INLINE size_t string_length_sse2(const char *src) {
+ using Vector __attribute__((may_alias)) = __m128i;
+ Vector z = _mm_setzero_si128();
+ uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+ const Vector *block_ptr = reinterpret_cast<const Vector *>(src - misalign_bytes);
+ if (misalign_bytes)
+ {
+ Vector v = _mm_load_si128 (block_ptr);
+ Vector vcmp = _mm_cmpeq_epi8 (z, v);
+ // shift away results in irrelevant bytes.
+ int cmp = _mm_movemask_epi8 (vcmp) >> misalign_bytes;
+ if (cmp)
+ return __builtin_ctz (cmp);
+ block_ptr++;
+ }
+ while (true)
+ {
+ Vector v = _mm_load_si128 (block_ptr);
+ Vector vcmp = _mm_cmpeq_epi8 (z, v);
+ int cmp = _mm_movemask_epi8 (vcmp);
+ if (cmp)
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+ reinterpret_cast<uintptr_t>(src) +
+ __builtin_ctz(cmp));
+ block_ptr++;
+ }
+}
+#endif
+
+#if defined(__AVX2__)
+[[maybe_unused]] LIBC_INLINE size_t string_length_avx2(const char *src) {
+ using Vector __attribute__((may_alias)) = __mm256i;
+ Vector z = _mm256_setzero_si256();
+ uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+ const Vector *block_ptr = reinterpret_cast<const Vector *>(src - misalign_bytes);
+ if (misalign_bytes)
+ {
+ Vector v = _mm256_load_si256 (block_ptr);
+ Vector vcmp = _mm256_cmpeq_epi8 (z, v);
+ // shift away results in irrelevant bytes.
+ int cmp = _mm256_movemask_epi8 (vcmp) >> misalign_bytes;
+ if (cmp)
+ return __builtin_ctz(cmp);
+ block_ptr++;
+ }
+ while (true)
+ {
+ Vector v = _mm256_load_si256 (block_ptr);
+ Vector vcmp = _mm256_cmpeq_epi8 (z, v);
+ int cmp = _mm256_movemask_epi8 (vcmp);
+ if (cmp)
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+ reinterpret_cast<uintptr_t>(src) +
+ __builtin_ctz(cmp));
+ block_ptr++;
+ }
+}
+#endif // __AVX__
+
+#if defined(__AVX512F__)
+[[maybe_unused]] LIBC_INLINE size_t string_length_avx512(const char *src) {
+ using Vector __attribute__((may_alias)) = __mm512i;
+ Vector z = _mm512_setzero_si512();
+ uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+ const Vector *block_ptr = reinterpret_cast<const Vector *>(src - misalign_bytes);
+ if (misalign_bytes) {
+ Vector v = _mm512_load_si512(block_ptr);
+ __mmask64 cmp = _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ) >> misalign_bytes;
+ if (cmp)
+ return __builtin_ctzl(cmp);
+ block_ptr++;
+ }
+ while (true)
+ {
+ Vector v = _mm512_load_si512(block_ptr);
+ __mmask64 cmp = _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
+ if (cmp)
+ return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+ reinterpret_cast<uintptr_t>(src) +
+ __builtin_ctz(cmp));
+ block_ptr++;
+ }
+}
+#endif // __AVX512F__
+
+template<typename T> LIBC_INLINE
+size_t string_length_x86_64(const char *src) {
+#if defined(__AVX512F__)
+ return string_length_avx512(src);
+#endif
+#if defined(__AVX__)
+ return string_length_avx2(src);
+#endif
+ return string_length_sse2(src);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 80e5783c7890b..8312ef895b243 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -22,6 +22,19 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#if defined(LIBC_TARGET_ARCH_IS_X86)
+#include "src/string/memory_utils/x86_64/inline_strlen.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_x86_64
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "src/string/memory_utils/aarch64/inline_strlen.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_aarch64
+#else
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_wide_read
+#endif
+#endif
+
namespace LIBC_NAMESPACE_DECL {
namespace internal {
@@ -90,7 +103,7 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
// be aligned to a word boundary, so it's the size we use for reading the
// string a block at a time.
if constexpr (cpp::is_same_v<T, char>)
- return string_length_wide_read<unsigned int>(src);
+ return LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ<unsigned int>(src);
#endif
size_t length;
for (length = 0; *src; ++src, ++length)
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 6a9bd09a2ed56..68e6e86f5a9f1 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -4865,6 +4865,7 @@ libc_support_library(
"src/string/memory_utils/aarch64/inline_memcpy.h",
"src/string/memory_utils/aarch64/inline_memmove.h",
"src/string/memory_utils/aarch64/inline_memset.h",
+ "src/string/memory_utils/aarch64/inline_strlen.h",
"src/string/memory_utils/arm/common.h",
"src/string/memory_utils/arm/inline_memcpy.h",
"src/string/memory_utils/arm/inline_memset.h",
@@ -4889,6 +4890,7 @@ libc_support_library(
"src/string/memory_utils/x86_64/inline_memcpy.h",
"src/string/memory_utils/x86_64/inline_memmove.h",
"src/string/memory_utils/x86_64/inline_memset.h",
+ "src/string/memory_utils/x86_64/inline_strlen.h",
],
deps = [
":__support_common",
@@ -4913,6 +4915,7 @@ libc_support_library(
":__support_macros_optimization",
":hdr_limits_macros",
":llvm_libc_types_size_t",
+ ":string_memory_utils",
":types_size_t",
],
)
diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
index b49e7c3fad954..209cd5ae80d29 100644
--- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
+++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
@@ -39,7 +39,7 @@ LIBC_CONFIGURE_OPTIONS = [
# "LIBC_COPT_SCANF_DISABLE_FLOAT",
# "LIBC_COPT_SCANF_DISABLE_INDEX_MODE",
"LIBC_COPT_STDIO_USE_SYSTEM_FILE",
- # "LIBC_COPT_STRING_UNSAFE_WIDE_READ",
+ "LIBC_COPT_STRING_UNSAFE_WIDE_READ",
# "LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH",
# "LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE",
# "LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION",
``````````
</details>
https://github.com/llvm/llvm-project/pull/152389
More information about the llvm-commits
mailing list