[libc] [llvm] Add vector-based strlen implementation for x86_64 and aarch64 (PR #152389)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 6 14:19:21 PDT 2025


https://github.com/Sterling-Augustine created https://github.com/llvm/llvm-project/pull/152389

These replace the default LIBC_CONF_STRING_UNSAFE_WIDE_READ implementation
on x86_64 and aarch64.

These are substantially faster than both the character-by-character
implementation and the original unsafe_wide_read implementation. Some below
I have been unable to performance-test the aarch64 version, but I suspect
speedups similar to avx2.

```
Function: strlen
Variant:
                                    char	       wide              	ull	        sse2	                 avx2           	avx512
=============================================================================================================================================================
   length=1, alignment=1:        13.18	       20.47 (-55.24%)	       20.21 (-53.27%)	       32.50 (-146.54%)	       26.05 (-97.61%)	       18.03 (-36.74%)
   length=1, alignment=0:        12.80	       34.92 (-172.89%)	       20.01 (-56.39%)	       17.52 (-36.86%)	       17.78 (-38.92%)	       18.04 (-40.94%)
   length=2, alignment=2:         9.91	       19.02 (-91.95%)	       12.64 (-27.52%)	       11.06 (-11.59%)	        9.48 (  4.38%)	        9.48 (  4.34%)
   length=2, alignment=0:         9.56	       26.88 (-181.24%)	       12.64 (-32.31%)	       11.06 (-15.73%)	       11.06 (-15.72%)	       11.83 (-23.80%)
   length=3, alignment=3:         8.31	       10.45 (-25.84%)	        8.28 (  0.32%)	        8.28 (  0.36%)	        6.21 ( 25.28%)	        6.21 ( 25.24%)
   length=3, alignment=0:         8.39	       14.53 (-73.20%)	        8.28 (  1.33%)	        7.24 ( 13.69%)	        7.56 (  9.94%)	        7.25 ( 13.65%)
   length=4, alignment=4:         9.84	       21.76 (-121.24%)	       15.55 (-58.11%)	        6.57 ( 33.18%)	        5.02 ( 48.98%)	        6.00 ( 39.00%)
   length=4, alignment=0:         8.64	       13.70 (-58.51%)	        7.28 ( 15.73%)	        6.37 ( 26.31%)	        6.36 ( 26.36%)	        6.36 ( 26.36%)
   length=5, alignment=5:        11.85	       23.81 (-100.97%)	       12.17 ( -2.67%)	        5.68 ( 52.09%)	        4.87 ( 58.94%)	        6.48 ( 45.33%)
   length=5, alignment=0:        11.82	       13.64 (-15.42%)	        7.27 ( 38.45%)	        6.36 ( 46.15%)	        6.37 ( 46.11%)	        6.36 ( 46.14%)
   length=6, alignment=6:        10.50	       19.37 (-84.56%)	       13.64 (-29.93%)	        6.54 ( 37.71%)	        6.89 ( 34.35%)	        9.45 ( 10.01%)
   length=6, alignment=0:        14.96	       14.05 (  6.04%)	        6.49 ( 56.62%)	        5.68 ( 62.04%)	        5.68 ( 62.04%)	       13.15 ( 12.05%)
   length=7, alignment=7:        10.97	       18.02 (-64.35%)	       14.59 (-33.06%)	        6.36 ( 41.96%)	        5.46 ( 50.25%)	        5.46 ( 50.25%)
   length=7, alignment=0:        10.96	       15.76 (-43.77%)	       15.37 (-40.15%)	        6.96 ( 36.51%)	        5.68 ( 48.22%)	        7.04 ( 35.83%)
   length=4, alignment=0:         8.66	       13.69 (-58.02%)	        7.28 ( 16.00%)	        6.37 ( 26.44%)	        6.37 ( 26.52%)	        6.61 ( 23.74%)
   length=4, alignment=7:         8.87	       17.35 (-95.73%)	       12.18 (-37.39%)	        5.68 ( 35.94%)	        4.87 ( 45.11%)	        6.00 ( 32.36%)
   length=4, alignment=2:         8.67	       10.05 (-15.91%)	        7.28 ( 16.01%)	        7.37 ( 15.02%)	        5.46 ( 37.02%)	        5.47 ( 36.89%)
   length=2, alignment=2:         5.64	       10.01 (-77.64%)	        7.29 (-29.34%)	        6.37 (-13.04%)	        5.46 (  3.19%)	        5.46 (  3.19%)
   length=8, alignment=0:        12.78	       16.52 (-29.33%)	       18.27 (-43.00%)	       11.82 (  7.47%)	        9.83 ( 23.03%)	       11.46 ( 10.27%)
   length=8, alignment=7:        14.24	       17.30 (-21.49%)	       12.16 ( 14.59%)	        5.68 ( 60.14%)	        4.87 ( 65.83%)	        6.23 ( 56.28%)
   length=8, alignment=3:        12.34	       26.15 (-111.98%)	       12.20 (  1.14%)	        6.50 ( 47.34%)	        4.87 ( 60.54%)	        6.18 ( 49.94%)
   length=5, alignment=3:        10.95	       19.74 (-80.30%)	       12.17 (-11.11%)	        5.68 ( 48.16%)	        4.87 ( 55.56%)	        5.96 ( 45.55%)
   length=16, alignment=0:        20.33	       29.29 (-44.08%)	       36.18 (-77.97%)	        5.68 ( 72.06%)	        5.68 ( 72.08%)	       10.60 ( 47.86%)
   length=16, alignment=7:        19.29	       17.52 (  9.16%)	       12.98 ( 32.73%)	        7.05 ( 63.47%)	        4.87 ( 74.75%)	        6.23 ( 67.71%)
   length=16, alignment=4:        20.54	       25.18 (-22.56%)	       15.42 ( 24.92%)	        7.31 ( 64.43%)	        4.87 ( 76.29%)	        5.98 ( 70.88%)
   length=10, alignment=4:        14.59	       21.26 (-45.71%)	       12.17 ( 16.58%)	        5.68 ( 61.07%)	        4.87 ( 66.65%)	        6.00 ( 58.91%)
   length=32, alignment=0:        35.46	       22.00 ( 37.95%)	       16.22 ( 54.26%)	        7.32 ( 79.35%)	        5.68 ( 83.98%)	        7.01 ( 80.22%)
   length=32, alignment=7:        35.23	       24.14 ( 31.48%)	       16.22 ( 53.96%)	        7.30 ( 79.28%)	        8.76 ( 75.12%)	        6.14 ( 82.58%)
   length=32, alignment=5:        35.16	       28.56 ( 18.76%)	       16.22 ( 53.87%)	        7.30 ( 79.23%)	        6.77 ( 80.75%)	        9.82 ( 72.07%)
   length=21, alignment=5:        26.47	       27.66 ( -4.49%)	       15.04 ( 43.17%)	        6.90 ( 73.95%)	        4.87 ( 81.60%)	        6.04 ( 77.18%)
   length=64, alignment=0:        66.45	       25.16 ( 62.14%)	       22.70 ( 65.83%)	       12.99 ( 80.44%)	        7.47 ( 88.77%)	        8.70 ( 86.90%)
   length=64, alignment=7:        64.75	       27.78 ( 57.10%)	       22.72 ( 64.91%)	       10.85 ( 83.25%)	        7.46 ( 88.48%)	        8.68 ( 86.60%)
   length=64, alignment=6:        67.26	       28.58 ( 57.51%)	       22.70 ( 66.24%)	       11.26 ( 83.25%)	        9.46 ( 85.94%)	       13.90 ( 79.33%)
   length=42, alignment=6:        73.42	       27.97 ( 61.91%)	       19.46 ( 73.49%)	        8.92 ( 87.84%)	        6.49 ( 91.16%)	        6.00 ( 91.83%)
  length=128, alignment=0:       172.07	       39.18 ( 77.23%)	       35.68 ( 79.26%)	       13.02 ( 92.43%)	       12.98 ( 92.46%)	        9.76 ( 94.33%)
  length=128, alignment=7:       163.98	       43.79 ( 73.30%)	       36.03 ( 78.03%)	       15.68 ( 90.44%)	       11.35 ( 93.08%)	       10.51 ( 93.59%)
  length=128, alignment=7:       185.86	       40.27 ( 78.33%)	       36.04 ( 80.61%)	       13.78 ( 92.58%)	       11.35 ( 93.89%)	       10.49 ( 94.36%)
   length=85, alignment=7:       121.61	       55.66 ( 54.23%)	       32.34 ( 73.40%)	       13.88 ( 88.59%)	        7.30 ( 94.00%)	        8.72 ( 92.83%)
  length=256, alignment=0:       295.54	       66.48 ( 77.50%)	       61.63 ( 79.15%)	       19.54 ( 93.39%)	       12.97 ( 95.61%)	       12.45 ( 95.79%)
  length=256, alignment=7:       308.06	       78.92 ( 74.38%)	       61.63 ( 80.00%)	       22.90 ( 92.57%)	       12.97 ( 95.79%)	       13.23 ( 95.71%)
  length=256, alignment=8:       295.32	       65.83 ( 77.71%)	       61.62 ( 79.13%)	       23.19 ( 92.15%)	       12.97 ( 95.61%)	       13.50 ( 95.43%)
  length=170, alignment=8:       234.39	       48.79 ( 79.18%)	       43.79 ( 81.32%)	       16.22 ( 93.08%)	       13.97 ( 94.04%)	       10.48 ( 95.53%)
  length=512, alignment=0:       563.75	      116.89 ( 79.27%)	      114.99 ( 79.60%)	       62.71 ( 88.88%)	       19.58 ( 96.53%)	       17.76 ( 96.85%)
  length=512, alignment=7:       580.53	      120.91 ( 79.17%)	      114.47 ( 80.28%)	       37.75 ( 93.50%)	       19.55 ( 96.63%)	       18.68 ( 96.78%)
  length=512, alignment=9:       584.05	      128.35 ( 78.02%)	      114.74 ( 80.35%)	       39.09 ( 93.31%)	       19.76 ( 96.62%)	       18.71 ( 96.80%)
  length=341, alignment=9:       405.84	       90.87 ( 77.61%)	       78.79 ( 80.59%)	       28.77 ( 92.91%)	       14.60 ( 96.40%)	       14.15 ( 96.51%)
 length=1024, alignment=0:      1143.61	      247.03 ( 78.40%)	      243.70 ( 78.69%)	       75.59 ( 93.39%)	       67.02 ( 94.14%)	       28.99 ( 97.46%)
 length=1024, alignment=7:      1124.55	      267.87 ( 76.18%)	      259.16 ( 76.95%)	       64.96 ( 94.22%)	       33.05 ( 97.06%)	       30.91 ( 97.25%)
length=1024, alignment=10:      1459.58	      257.79 ( 82.34%)	      239.91 ( 83.56%)	       65.00 ( 95.55%)	       33.10 ( 97.73%)	       30.33 ( 97.92%)
 length=682, alignment=10:       732.89	      163.67 ( 77.67%)	      170.54 ( 76.73%)	       46.48 ( 93.66%)	       24.32 ( 96.68%)	       21.44 ( 97.07%)
 length=2048, alignment=0:      2141.96	      451.61 ( 78.92%)	      448.00 ( 79.08%)	      133.24 ( 93.78%)	       61.22 ( 97.14%)	       80.08 ( 96.26%)
 length=2048, alignment=7:      2145.05	      458.26 ( 78.64%)	      449.99 ( 79.02%)	      140.19 ( 93.46%)	       60.26 ( 97.19%)	       51.71 ( 97.59%)
length=2048, alignment=11:      2162.61	      463.37 ( 78.57%)	      448.07 ( 79.28%)	      140.29 ( 93.51%)	       59.51 ( 97.25%)	       51.59 ( 97.61%)
length=1365, alignment=11:      1439.74	      322.86 ( 77.58%)	      310.84 ( 78.41%)	      116.08 ( 91.94%)	       42.43 ( 97.05%)	       36.15 ( 97.49%)
 length=4096, alignment=0:      4278.68	      871.60 ( 79.63%)	      865.25 ( 79.78%)	      252.50 ( 94.10%)	      161.17 ( 96.23%)	       94.97 ( 97.78%)
 length=4096, alignment=7:      4253.01	      871.62 ( 79.51%)	      864.21 ( 79.68%)	      243.90 ( 94.27%)	      171.17 ( 95.98%)	       95.14 ( 97.76%)
length=4096, alignment=12:      4252.18	      879.66 ( 79.31%)	      863.68 ( 79.69%)	      244.26 ( 94.26%)	      185.36 ( 95.64%)	       93.61 ( 97.80%)
length=2730, alignment=12:      2868.22	      597.65 ( 79.16%)	      586.22 ( 79.56%)	      175.09 ( 93.90%)	      120.35 ( 95.80%)	      101.35 ( 96.47%)
    length=0, alignment=0:         4.87	        8.11 (-66.73%)	        6.49 (-33.34%)	        5.80 (-19.26%)	        5.68 (-16.67%)	        6.86 (-40.91%)
   length=32, alignment=0:        33.82	       22.36 ( 33.89%)	       17.03 ( 49.66%)	        7.30 ( 78.42%)	        5.68 ( 83.22%)	        7.50 ( 77.83%)
   length=64, alignment=0:        66.20	       26.76 ( 59.58%)	       23.22 ( 64.93%)	       12.99 ( 80.37%)	        7.34 ( 88.92%)	        8.44 ( 87.25%)
   length=96, alignment=0:       130.26	       31.62 ( 75.72%)	       30.00 ( 76.97%)	       11.39 ( 91.26%)	       10.54 ( 91.91%)	        8.68 ( 93.34%)
  length=128, alignment=0:       164.66	       39.05 ( 76.29%)	       35.68 ( 78.33%)	       13.07 ( 92.07%)	       12.97 ( 92.12%)	        9.59 ( 94.18%)
  length=160, alignment=0:       196.63	       45.18 ( 77.02%)	       42.16 ( 78.56%)	       14.65 ( 92.55%)	       10.87 ( 94.47%)	        9.31 ( 95.27%)
  length=192, alignment=0:       225.50	       52.71 ( 76.63%)	       49.61 ( 78.00%)	       16.22 ( 92.81%)	       11.36 ( 94.96%)	       11.08 ( 95.09%)
  length=224, alignment=0:       261.08	       57.57 ( 77.95%)	       55.82 ( 78.62%)	       17.84 ( 93.17%)	       12.16 ( 95.34%)	       11.51 ( 95.59%)
  length=256, alignment=0:       295.13	       65.56 ( 77.79%)	       62.59 ( 78.79%)	       19.46 ( 93.41%)	       13.12 ( 95.56%)	       12.33 ( 95.82%)
  length=288, alignment=0:       325.69	       72.16 ( 77.84%)	       69.20 ( 78.75%)	       21.08 ( 93.53%)	       13.94 ( 95.72%)	       12.32 ( 96.22%)
  length=320, alignment=0:       364.18	       78.78 ( 78.37%)	       75.69 ( 79.21%)	       22.71 ( 93.77%)	       14.70 ( 95.96%)	       14.46 ( 96.03%)
  length=352, alignment=0:       391.40	       84.87 ( 78.32%)	       82.15 ( 79.01%)	       24.50 ( 93.74%)	       15.62 ( 96.01%)	       14.27 ( 96.35%)
  length=384, alignment=0:       428.50	       91.43 ( 78.66%)	       88.70 ( 79.30%)	       26.16 ( 93.90%)	       17.29 ( 95.97%)	       15.04 ( 96.49%)
  length=416, alignment=0:       457.30	       98.23 ( 78.52%)	       95.02 ( 79.22%)	       27.81 ( 93.92%)	       17.22 ( 96.23%)	       15.05 ( 96.71%)
  length=448, alignment=0:       488.38	      104.52 ( 78.60%)	      101.87 ( 79.14%)	       31.22 ( 93.61%)	       18.07 ( 96.30%)	       16.89 ( 96.54%)
  length=480, alignment=0:       526.44	      109.61 ( 79.18%)	      108.11 ( 79.46%)	       31.11 ( 94.09%)	       18.88 ( 96.41%)	       17.10 ( 96.75%)
  length=512, alignment=0:       556.50	      117.29 ( 78.92%)	      113.78 ( 79.56%)	       62.57 ( 88.76%)	       19.88 ( 96.43%)	       17.80 ( 96.80%)
  length=576, alignment=0:       622.17	      152.93 ( 75.42%)	      127.58 ( 79.49%)	       39.34 ( 93.68%)	       21.31 ( 96.58%)	       19.99 ( 96.79%)
  length=640, alignment=0:       691.01	      142.56 ( 79.37%)	      161.78 ( 76.59%)	       39.20 ( 94.33%)	       22.98 ( 96.67%)	       20.13 ( 97.09%)
  length=704, alignment=0:       756.90	      156.31 ( 79.35%)	      176.19 ( 76.72%)	       45.03 ( 94.05%)	       24.82 ( 96.72%)	       22.33 ( 97.05%)
  length=768, alignment=0:       826.23	      193.17 ( 76.62%)	      188.41 ( 77.20%)	       50.81 ( 93.85%)	       27.46 ( 96.68%)	       23.25 ( 97.19%)
  length=832, alignment=0:       890.17	      204.81 ( 76.99%)	      201.61 ( 77.35%)	       53.77 ( 93.96%)	       27.73 ( 96.88%)	       25.06 ( 97.18%)
  length=896, alignment=0:       959.52	      217.89 ( 77.29%)	      213.86 ( 77.71%)	       57.99 ( 93.96%)	       29.53 ( 96.92%)	       26.29 ( 97.26%)
  length=960, alignment=0:      1024.52	      231.06 ( 77.45%)	      227.05 ( 77.84%)	       60.36 ( 94.11%)	       32.29 ( 96.85%)	       27.94 ( 97.27%)
 length=1024, alignment=0:      1086.71	      244.17 ( 77.53%)	      239.87 ( 77.93%)	       64.72 ( 94.04%)	       72.38 ( 93.34%)	       28.72 ( 97.36%)
 length=1152, alignment=0:      1231.48	      270.22 ( 78.06%)	      266.47 ( 78.36%)	       73.38 ( 94.04%)	       40.24 ( 96.73%)	       32.42 ( 97.37%)
 length=1280, alignment=0:      1349.29	      295.45 ( 78.10%)	      292.69 ( 78.31%)	      111.80 ( 91.71%)	       42.44 ( 96.85%)	       34.59 ( 97.44%)
 length=1408, alignment=0:      1487.13	      322.57 ( 78.31%)	      318.18 ( 78.60%)	       84.47 ( 94.32%)	       44.35 ( 97.02%)	       37.31 ( 97.49%)
 length=1536, alignment=0:      1623.52	      347.98 ( 78.57%)	      344.24 ( 78.80%)	      108.31 ( 93.33%)	       49.82 ( 96.93%)	       39.94 ( 97.54%)
 length=1664, alignment=0:      1748.88	      373.80 ( 78.63%)	      370.03 ( 78.84%)	      118.76 ( 93.21%)	       52.89 ( 96.98%)	       42.93 ( 97.55%)
 length=1792, alignment=0:      1886.22	      399.59 ( 78.82%)	      397.39 ( 78.93%)	      127.32 ( 93.25%)	       53.64 ( 97.16%)	       45.39 ( 97.59%)
 length=1920, alignment=0:      2018.37	      425.98 ( 78.89%)	      422.31 ( 79.08%)	      126.70 ( 93.72%)	       57.08 ( 97.17%)	       48.12 ( 97.62%)
 length=2048, alignment=0:      2167.09	      451.70 ( 79.16%)	      447.70 ( 79.34%)	      141.68 ( 93.46%)	       61.63 ( 97.16%)	       79.06 ( 96.35%)
 length=2304, alignment=0:      2422.03	      503.63 ( 79.21%)	      502.23 ( 79.26%)	      149.62 ( 93.82%)	       73.10 ( 96.98%)	       56.97 ( 97.65%)
 length=2560, alignment=0:      2678.68	      556.84 ( 79.21%)	      553.24 ( 79.35%)	      161.06 ( 93.99%)	      127.74 ( 95.23%)	       58.81 ( 97.80%)
 length=2816, alignment=0:      2941.95	      608.70 ( 79.31%)	      604.03 ( 79.47%)	      171.85 ( 94.16%)	       87.11 ( 97.04%)	       67.08 ( 97.72%)
 length=3072, alignment=0:      3229.89	      660.14 ( 79.56%)	      659.19 ( 79.59%)	      183.85 ( 94.31%)	      140.25 ( 95.66%)	       73.01 ( 97.74%)
 length=3328, alignment=0:      3496.08	      713.05 ( 79.60%)	      710.00 ( 79.69%)	      209.72 ( 94.00%)	      138.78 ( 96.03%)	       77.81 ( 97.77%)
 length=3584, alignment=0:      3756.52	      766.19 ( 79.60%)	      763.94 ( 79.66%)	      214.16 ( 94.30%)	      146.36 ( 96.10%)	       83.43 ( 97.78%)
 length=3840, alignment=0:      4017.15	      817.43 ( 79.65%)	      819.77 ( 79.59%)	      242.07 ( 93.97%)	      164.56 ( 95.90%)	       89.72 ( 97.77%)
 length=4096, alignment=0:      4281.59	      867.87 ( 79.73%)	      864.71 ( 79.80%)	      243.33 ( 94.32%)	      173.11 ( 95.96%)	       95.65 ( 97.77%)
 length=4608, alignment=0:      4810.30	      977.80 ( 79.67%)	      985.03 ( 79.52%)	      271.13 ( 94.36%)	      190.62 ( 96.04%)	      107.82 ( 97.76%)
```
 length=5120, alignment=0:      5380.16	     1075.77 ( 80.00%)	     1071.80 ( 80.08%)	      294.27 ( 94.53%)	      206.04 ( 96.17%)	      141.90 ( 97.36%)
 length=5632, alignment=0:      5925.70	     1195.61 ( 79.82%)	     1193.68 ( 79.86%)	      323.42 ( 94.54%)	      223.55 ( 96.23%)	      125.28 ( 97.89%)
 length=6144, alignment=0:      6402.20	     1285.52 ( 79.92%)	     1281.04 ( 79.99%)	      342.68 ( 94.65%)	      234.84 ( 96.33%)	      167.01 ( 97.39%)
 length=6656, alignment=0:      6997.01	     1387.32 ( 80.17%)	     1384.21 ( 80.22%)	      365.93 ( 94.77%)	      269.89 ( 96.14%)	      176.40 ( 97.48%)
 length=7168, alignment=0:      7454.76	     1492.10 ( 79.98%)	     1488.45 ( 80.03%)	      391.92 ( 94.74%)	      280.81 ( 96.23%)	      187.73 ( 97.48%)
 length=7680, alignment=0:      8163.34	     1608.43 ( 80.30%)	     1615.98 ( 80.20%)	      460.03 ( 94.36%)	      299.86 ( 96.33%)	      201.40 ( 97.53%)

>From 9a4760a2209bdba900c8b49e4054fee631bf0e41 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine at google.com>
Date: Mon, 4 Aug 2025 09:34:59 -0700
Subject: [PATCH] Add vector-based strlen implementation for x86_64 and aarch64

These replace the default LIBC_CONF_STRING_UNSAFE_WIDE_READ implementation
on x86_64 and aarch64.

These are substantially faster than both the character-by-character
implementation and the original unsafe_wide_read implementation. Some data below.
I have been unable to performance-test the aarch64 version, but I suspect
speedups similar to avx2.

Function: strlen
Variant:
                                    char	       wide              	ull	        sse2	                 avx2           	avx512
=============================================================================================================================================================
   length=1, alignment=1:        13.18	       20.47 (-55.24%)	       20.21 (-53.27%)	       32.50 (-146.54%)	       26.05 (-97.61%)	       18.03 (-36.74%)
   length=1, alignment=0:        12.80	       34.92 (-172.89%)	       20.01 (-56.39%)	       17.52 (-36.86%)	       17.78 (-38.92%)	       18.04 (-40.94%)
   length=2, alignment=2:         9.91	       19.02 (-91.95%)	       12.64 (-27.52%)	       11.06 (-11.59%)	        9.48 (  4.38%)	        9.48 (  4.34%)
   length=2, alignment=0:         9.56	       26.88 (-181.24%)	       12.64 (-32.31%)	       11.06 (-15.73%)	       11.06 (-15.72%)	       11.83 (-23.80%)
   length=3, alignment=3:         8.31	       10.45 (-25.84%)	        8.28 (  0.32%)	        8.28 (  0.36%)	        6.21 ( 25.28%)	        6.21 ( 25.24%)
   length=3, alignment=0:         8.39	       14.53 (-73.20%)	        8.28 (  1.33%)	        7.24 ( 13.69%)	        7.56 (  9.94%)	        7.25 ( 13.65%)
   length=4, alignment=4:         9.84	       21.76 (-121.24%)	       15.55 (-58.11%)	        6.57 ( 33.18%)	        5.02 ( 48.98%)	        6.00 ( 39.00%)
   length=4, alignment=0:         8.64	       13.70 (-58.51%)	        7.28 ( 15.73%)	        6.37 ( 26.31%)	        6.36 ( 26.36%)	        6.36 ( 26.36%)
   length=5, alignment=5:        11.85	       23.81 (-100.97%)	       12.17 ( -2.67%)	        5.68 ( 52.09%)	        4.87 ( 58.94%)	        6.48 ( 45.33%)
   length=5, alignment=0:        11.82	       13.64 (-15.42%)	        7.27 ( 38.45%)	        6.36 ( 46.15%)	        6.37 ( 46.11%)	        6.36 ( 46.14%)
   length=6, alignment=6:        10.50	       19.37 (-84.56%)	       13.64 (-29.93%)	        6.54 ( 37.71%)	        6.89 ( 34.35%)	        9.45 ( 10.01%)
   length=6, alignment=0:        14.96	       14.05 (  6.04%)	        6.49 ( 56.62%)	        5.68 ( 62.04%)	        5.68 ( 62.04%)	       13.15 ( 12.05%)
   length=7, alignment=7:        10.97	       18.02 (-64.35%)	       14.59 (-33.06%)	        6.36 ( 41.96%)	        5.46 ( 50.25%)	        5.46 ( 50.25%)
   length=7, alignment=0:        10.96	       15.76 (-43.77%)	       15.37 (-40.15%)	        6.96 ( 36.51%)	        5.68 ( 48.22%)	        7.04 ( 35.83%)
   length=4, alignment=0:         8.66	       13.69 (-58.02%)	        7.28 ( 16.00%)	        6.37 ( 26.44%)	        6.37 ( 26.52%)	        6.61 ( 23.74%)
   length=4, alignment=7:         8.87	       17.35 (-95.73%)	       12.18 (-37.39%)	        5.68 ( 35.94%)	        4.87 ( 45.11%)	        6.00 ( 32.36%)
   length=4, alignment=2:         8.67	       10.05 (-15.91%)	        7.28 ( 16.01%)	        7.37 ( 15.02%)	        5.46 ( 37.02%)	        5.47 ( 36.89%)
   length=2, alignment=2:         5.64	       10.01 (-77.64%)	        7.29 (-29.34%)	        6.37 (-13.04%)	        5.46 (  3.19%)	        5.46 (  3.19%)
   length=8, alignment=0:        12.78	       16.52 (-29.33%)	       18.27 (-43.00%)	       11.82 (  7.47%)	        9.83 ( 23.03%)	       11.46 ( 10.27%)
   length=8, alignment=7:        14.24	       17.30 (-21.49%)	       12.16 ( 14.59%)	        5.68 ( 60.14%)	        4.87 ( 65.83%)	        6.23 ( 56.28%)
   length=8, alignment=3:        12.34	       26.15 (-111.98%)	       12.20 (  1.14%)	        6.50 ( 47.34%)	        4.87 ( 60.54%)	        6.18 ( 49.94%)
   length=5, alignment=3:        10.95	       19.74 (-80.30%)	       12.17 (-11.11%)	        5.68 ( 48.16%)	        4.87 ( 55.56%)	        5.96 ( 45.55%)
   length=16, alignment=0:        20.33	       29.29 (-44.08%)	       36.18 (-77.97%)	        5.68 ( 72.06%)	        5.68 ( 72.08%)	       10.60 ( 47.86%)
   length=16, alignment=7:        19.29	       17.52 (  9.16%)	       12.98 ( 32.73%)	        7.05 ( 63.47%)	        4.87 ( 74.75%)	        6.23 ( 67.71%)
   length=16, alignment=4:        20.54	       25.18 (-22.56%)	       15.42 ( 24.92%)	        7.31 ( 64.43%)	        4.87 ( 76.29%)	        5.98 ( 70.88%)
   length=10, alignment=4:        14.59	       21.26 (-45.71%)	       12.17 ( 16.58%)	        5.68 ( 61.07%)	        4.87 ( 66.65%)	        6.00 ( 58.91%)
   length=32, alignment=0:        35.46	       22.00 ( 37.95%)	       16.22 ( 54.26%)	        7.32 ( 79.35%)	        5.68 ( 83.98%)	        7.01 ( 80.22%)
   length=32, alignment=7:        35.23	       24.14 ( 31.48%)	       16.22 ( 53.96%)	        7.30 ( 79.28%)	        8.76 ( 75.12%)	        6.14 ( 82.58%)
   length=32, alignment=5:        35.16	       28.56 ( 18.76%)	       16.22 ( 53.87%)	        7.30 ( 79.23%)	        6.77 ( 80.75%)	        9.82 ( 72.07%)
   length=21, alignment=5:        26.47	       27.66 ( -4.49%)	       15.04 ( 43.17%)	        6.90 ( 73.95%)	        4.87 ( 81.60%)	        6.04 ( 77.18%)
   length=64, alignment=0:        66.45	       25.16 ( 62.14%)	       22.70 ( 65.83%)	       12.99 ( 80.44%)	        7.47 ( 88.77%)	        8.70 ( 86.90%)
   length=64, alignment=7:        64.75	       27.78 ( 57.10%)	       22.72 ( 64.91%)	       10.85 ( 83.25%)	        7.46 ( 88.48%)	        8.68 ( 86.60%)
   length=64, alignment=6:        67.26	       28.58 ( 57.51%)	       22.70 ( 66.24%)	       11.26 ( 83.25%)	        9.46 ( 85.94%)	       13.90 ( 79.33%)
   length=42, alignment=6:        73.42	       27.97 ( 61.91%)	       19.46 ( 73.49%)	        8.92 ( 87.84%)	        6.49 ( 91.16%)	        6.00 ( 91.83%)
  length=128, alignment=0:       172.07	       39.18 ( 77.23%)	       35.68 ( 79.26%)	       13.02 ( 92.43%)	       12.98 ( 92.46%)	        9.76 ( 94.33%)
  length=128, alignment=7:       163.98	       43.79 ( 73.30%)	       36.03 ( 78.03%)	       15.68 ( 90.44%)	       11.35 ( 93.08%)	       10.51 ( 93.59%)
  length=128, alignment=7:       185.86	       40.27 ( 78.33%)	       36.04 ( 80.61%)	       13.78 ( 92.58%)	       11.35 ( 93.89%)	       10.49 ( 94.36%)
   length=85, alignment=7:       121.61	       55.66 ( 54.23%)	       32.34 ( 73.40%)	       13.88 ( 88.59%)	        7.30 ( 94.00%)	        8.72 ( 92.83%)
  length=256, alignment=0:       295.54	       66.48 ( 77.50%)	       61.63 ( 79.15%)	       19.54 ( 93.39%)	       12.97 ( 95.61%)	       12.45 ( 95.79%)
  length=256, alignment=7:       308.06	       78.92 ( 74.38%)	       61.63 ( 80.00%)	       22.90 ( 92.57%)	       12.97 ( 95.79%)	       13.23 ( 95.71%)
  length=256, alignment=8:       295.32	       65.83 ( 77.71%)	       61.62 ( 79.13%)	       23.19 ( 92.15%)	       12.97 ( 95.61%)	       13.50 ( 95.43%)
  length=170, alignment=8:       234.39	       48.79 ( 79.18%)	       43.79 ( 81.32%)	       16.22 ( 93.08%)	       13.97 ( 94.04%)	       10.48 ( 95.53%)
  length=512, alignment=0:       563.75	      116.89 ( 79.27%)	      114.99 ( 79.60%)	       62.71 ( 88.88%)	       19.58 ( 96.53%)	       17.76 ( 96.85%)
  length=512, alignment=7:       580.53	      120.91 ( 79.17%)	      114.47 ( 80.28%)	       37.75 ( 93.50%)	       19.55 ( 96.63%)	       18.68 ( 96.78%)
  length=512, alignment=9:       584.05	      128.35 ( 78.02%)	      114.74 ( 80.35%)	       39.09 ( 93.31%)	       19.76 ( 96.62%)	       18.71 ( 96.80%)
  length=341, alignment=9:       405.84	       90.87 ( 77.61%)	       78.79 ( 80.59%)	       28.77 ( 92.91%)	       14.60 ( 96.40%)	       14.15 ( 96.51%)
 length=1024, alignment=0:      1143.61	      247.03 ( 78.40%)	      243.70 ( 78.69%)	       75.59 ( 93.39%)	       67.02 ( 94.14%)	       28.99 ( 97.46%)
 length=1024, alignment=7:      1124.55	      267.87 ( 76.18%)	      259.16 ( 76.95%)	       64.96 ( 94.22%)	       33.05 ( 97.06%)	       30.91 ( 97.25%)
length=1024, alignment=10:      1459.58	      257.79 ( 82.34%)	      239.91 ( 83.56%)	       65.00 ( 95.55%)	       33.10 ( 97.73%)	       30.33 ( 97.92%)
 length=682, alignment=10:       732.89	      163.67 ( 77.67%)	      170.54 ( 76.73%)	       46.48 ( 93.66%)	       24.32 ( 96.68%)	       21.44 ( 97.07%)
 length=2048, alignment=0:      2141.96	      451.61 ( 78.92%)	      448.00 ( 79.08%)	      133.24 ( 93.78%)	       61.22 ( 97.14%)	       80.08 ( 96.26%)
 length=2048, alignment=7:      2145.05	      458.26 ( 78.64%)	      449.99 ( 79.02%)	      140.19 ( 93.46%)	       60.26 ( 97.19%)	       51.71 ( 97.59%)
length=2048, alignment=11:      2162.61	      463.37 ( 78.57%)	      448.07 ( 79.28%)	      140.29 ( 93.51%)	       59.51 ( 97.25%)	       51.59 ( 97.61%)
length=1365, alignment=11:      1439.74	      322.86 ( 77.58%)	      310.84 ( 78.41%)	      116.08 ( 91.94%)	       42.43 ( 97.05%)	       36.15 ( 97.49%)
 length=4096, alignment=0:      4278.68	      871.60 ( 79.63%)	      865.25 ( 79.78%)	      252.50 ( 94.10%)	      161.17 ( 96.23%)	       94.97 ( 97.78%)
 length=4096, alignment=7:      4253.01	      871.62 ( 79.51%)	      864.21 ( 79.68%)	      243.90 ( 94.27%)	      171.17 ( 95.98%)	       95.14 ( 97.76%)
length=4096, alignment=12:      4252.18	      879.66 ( 79.31%)	      863.68 ( 79.69%)	      244.26 ( 94.26%)	      185.36 ( 95.64%)	       93.61 ( 97.80%)
length=2730, alignment=12:      2868.22	      597.65 ( 79.16%)	      586.22 ( 79.56%)	      175.09 ( 93.90%)	      120.35 ( 95.80%)	      101.35 ( 96.47%)
    length=0, alignment=0:         4.87	        8.11 (-66.73%)	        6.49 (-33.34%)	        5.80 (-19.26%)	        5.68 (-16.67%)	        6.86 (-40.91%)
   length=32, alignment=0:        33.82	       22.36 ( 33.89%)	       17.03 ( 49.66%)	        7.30 ( 78.42%)	        5.68 ( 83.22%)	        7.50 ( 77.83%)
   length=64, alignment=0:        66.20	       26.76 ( 59.58%)	       23.22 ( 64.93%)	       12.99 ( 80.37%)	        7.34 ( 88.92%)	        8.44 ( 87.25%)
   length=96, alignment=0:       130.26	       31.62 ( 75.72%)	       30.00 ( 76.97%)	       11.39 ( 91.26%)	       10.54 ( 91.91%)	        8.68 ( 93.34%)
  length=128, alignment=0:       164.66	       39.05 ( 76.29%)	       35.68 ( 78.33%)	       13.07 ( 92.07%)	       12.97 ( 92.12%)	        9.59 ( 94.18%)
  length=160, alignment=0:       196.63	       45.18 ( 77.02%)	       42.16 ( 78.56%)	       14.65 ( 92.55%)	       10.87 ( 94.47%)	        9.31 ( 95.27%)
  length=192, alignment=0:       225.50	       52.71 ( 76.63%)	       49.61 ( 78.00%)	       16.22 ( 92.81%)	       11.36 ( 94.96%)	       11.08 ( 95.09%)
  length=224, alignment=0:       261.08	       57.57 ( 77.95%)	       55.82 ( 78.62%)	       17.84 ( 93.17%)	       12.16 ( 95.34%)	       11.51 ( 95.59%)
  length=256, alignment=0:       295.13	       65.56 ( 77.79%)	       62.59 ( 78.79%)	       19.46 ( 93.41%)	       13.12 ( 95.56%)	       12.33 ( 95.82%)
  length=288, alignment=0:       325.69	       72.16 ( 77.84%)	       69.20 ( 78.75%)	       21.08 ( 93.53%)	       13.94 ( 95.72%)	       12.32 ( 96.22%)
  length=320, alignment=0:       364.18	       78.78 ( 78.37%)	       75.69 ( 79.21%)	       22.71 ( 93.77%)	       14.70 ( 95.96%)	       14.46 ( 96.03%)
  length=352, alignment=0:       391.40	       84.87 ( 78.32%)	       82.15 ( 79.01%)	       24.50 ( 93.74%)	       15.62 ( 96.01%)	       14.27 ( 96.35%)
  length=384, alignment=0:       428.50	       91.43 ( 78.66%)	       88.70 ( 79.30%)	       26.16 ( 93.90%)	       17.29 ( 95.97%)	       15.04 ( 96.49%)
  length=416, alignment=0:       457.30	       98.23 ( 78.52%)	       95.02 ( 79.22%)	       27.81 ( 93.92%)	       17.22 ( 96.23%)	       15.05 ( 96.71%)
  length=448, alignment=0:       488.38	      104.52 ( 78.60%)	      101.87 ( 79.14%)	       31.22 ( 93.61%)	       18.07 ( 96.30%)	       16.89 ( 96.54%)
  length=480, alignment=0:       526.44	      109.61 ( 79.18%)	      108.11 ( 79.46%)	       31.11 ( 94.09%)	       18.88 ( 96.41%)	       17.10 ( 96.75%)
  length=512, alignment=0:       556.50	      117.29 ( 78.92%)	      113.78 ( 79.56%)	       62.57 ( 88.76%)	       19.88 ( 96.43%)	       17.80 ( 96.80%)
  length=576, alignment=0:       622.17	      152.93 ( 75.42%)	      127.58 ( 79.49%)	       39.34 ( 93.68%)	       21.31 ( 96.58%)	       19.99 ( 96.79%)
  length=640, alignment=0:       691.01	      142.56 ( 79.37%)	      161.78 ( 76.59%)	       39.20 ( 94.33%)	       22.98 ( 96.67%)	       20.13 ( 97.09%)
  length=704, alignment=0:       756.90	      156.31 ( 79.35%)	      176.19 ( 76.72%)	       45.03 ( 94.05%)	       24.82 ( 96.72%)	       22.33 ( 97.05%)
  length=768, alignment=0:       826.23	      193.17 ( 76.62%)	      188.41 ( 77.20%)	       50.81 ( 93.85%)	       27.46 ( 96.68%)	       23.25 ( 97.19%)
  length=832, alignment=0:       890.17	      204.81 ( 76.99%)	      201.61 ( 77.35%)	       53.77 ( 93.96%)	       27.73 ( 96.88%)	       25.06 ( 97.18%)
  length=896, alignment=0:       959.52	      217.89 ( 77.29%)	      213.86 ( 77.71%)	       57.99 ( 93.96%)	       29.53 ( 96.92%)	       26.29 ( 97.26%)
  length=960, alignment=0:      1024.52	      231.06 ( 77.45%)	      227.05 ( 77.84%)	       60.36 ( 94.11%)	       32.29 ( 96.85%)	       27.94 ( 97.27%)
 length=1024, alignment=0:      1086.71	      244.17 ( 77.53%)	      239.87 ( 77.93%)	       64.72 ( 94.04%)	       72.38 ( 93.34%)	       28.72 ( 97.36%)
 length=1152, alignment=0:      1231.48	      270.22 ( 78.06%)	      266.47 ( 78.36%)	       73.38 ( 94.04%)	       40.24 ( 96.73%)	       32.42 ( 97.37%)
 length=1280, alignment=0:      1349.29	      295.45 ( 78.10%)	      292.69 ( 78.31%)	      111.80 ( 91.71%)	       42.44 ( 96.85%)	       34.59 ( 97.44%)
 length=1408, alignment=0:      1487.13	      322.57 ( 78.31%)	      318.18 ( 78.60%)	       84.47 ( 94.32%)	       44.35 ( 97.02%)	       37.31 ( 97.49%)
 length=1536, alignment=0:      1623.52	      347.98 ( 78.57%)	      344.24 ( 78.80%)	      108.31 ( 93.33%)	       49.82 ( 96.93%)	       39.94 ( 97.54%)
 length=1664, alignment=0:      1748.88	      373.80 ( 78.63%)	      370.03 ( 78.84%)	      118.76 ( 93.21%)	       52.89 ( 96.98%)	       42.93 ( 97.55%)
 length=1792, alignment=0:      1886.22	      399.59 ( 78.82%)	      397.39 ( 78.93%)	      127.32 ( 93.25%)	       53.64 ( 97.16%)	       45.39 ( 97.59%)
 length=1920, alignment=0:      2018.37	      425.98 ( 78.89%)	      422.31 ( 79.08%)	      126.70 ( 93.72%)	       57.08 ( 97.17%)	       48.12 ( 97.62%)
 length=2048, alignment=0:      2167.09	      451.70 ( 79.16%)	      447.70 ( 79.34%)	      141.68 ( 93.46%)	       61.63 ( 97.16%)	       79.06 ( 96.35%)
 length=2304, alignment=0:      2422.03	      503.63 ( 79.21%)	      502.23 ( 79.26%)	      149.62 ( 93.82%)	       73.10 ( 96.98%)	       56.97 ( 97.65%)
 length=2560, alignment=0:      2678.68	      556.84 ( 79.21%)	      553.24 ( 79.35%)	      161.06 ( 93.99%)	      127.74 ( 95.23%)	       58.81 ( 97.80%)
 length=2816, alignment=0:      2941.95	      608.70 ( 79.31%)	      604.03 ( 79.47%)	      171.85 ( 94.16%)	       87.11 ( 97.04%)	       67.08 ( 97.72%)
 length=3072, alignment=0:      3229.89	      660.14 ( 79.56%)	      659.19 ( 79.59%)	      183.85 ( 94.31%)	      140.25 ( 95.66%)	       73.01 ( 97.74%)
 length=3328, alignment=0:      3496.08	      713.05 ( 79.60%)	      710.00 ( 79.69%)	      209.72 ( 94.00%)	      138.78 ( 96.03%)	       77.81 ( 97.77%)
 length=3584, alignment=0:      3756.52	      766.19 ( 79.60%)	      763.94 ( 79.66%)	      214.16 ( 94.30%)	      146.36 ( 96.10%)	       83.43 ( 97.78%)
 length=3840, alignment=0:      4017.15	      817.43 ( 79.65%)	      819.77 ( 79.59%)	      242.07 ( 93.97%)	      164.56 ( 95.90%)	       89.72 ( 97.77%)
 length=4096, alignment=0:      4281.59	      867.87 ( 79.73%)	      864.71 ( 79.80%)	      243.33 ( 94.32%)	      173.11 ( 95.96%)	       95.65 ( 97.77%)
 length=4608, alignment=0:      4810.30	      977.80 ( 79.67%)	      985.03 ( 79.52%)	      271.13 ( 94.36%)	      190.62 ( 96.04%)	      107.82 ( 97.76%)
 length=5120, alignment=0:      5380.16	     1075.77 ( 80.00%)	     1071.80 ( 80.08%)	      294.27 ( 94.53%)	      206.04 ( 96.17%)	      141.90 ( 97.36%)
 length=5632, alignment=0:      5925.70	     1195.61 ( 79.82%)	     1193.68 ( 79.86%)	      323.42 ( 94.54%)	      223.55 ( 96.23%)	      125.28 ( 97.89%)
 length=6144, alignment=0:      6402.20	     1285.52 ( 79.92%)	     1281.04 ( 79.99%)	      342.68 ( 94.65%)	      234.84 ( 96.33%)	      167.01 ( 97.39%)
 length=6656, alignment=0:      6997.01	     1387.32 ( 80.17%)	     1384.21 ( 80.22%)	      365.93 ( 94.77%)	      269.89 ( 96.14%)	      176.40 ( 97.48%)
 length=7168, alignment=0:      7454.76	     1492.10 ( 79.98%)	     1488.45 ( 80.03%)	      391.92 ( 94.74%)	      280.81 ( 96.23%)	      187.73 ( 97.48%)
 length=7680, alignment=0:      8163.34	     1608.43 ( 80.30%)	     1615.98 ( 80.20%)	      460.03 ( 94.36%)	      299.86 ( 96.33%)	      201.40 ( 97.53%)
---
 libc/src/string/inline_strlen.h               |  38 ++++++
 .../memory_utils/aarch64/inline_strlen.h      |  50 ++++++++
 .../memory_utils/x86_64/inline_strlen.h       | 116 ++++++++++++++++++
 libc/src/string/string_utils.h                |  15 ++-
 .../llvm-project-overlay/libc/BUILD.bazel     |   3 +
 .../libc/libc_configure_options.bzl           |   2 +-
 6 files changed, 222 insertions(+), 2 deletions(-)
 create mode 100644 libc/src/string/inline_strlen.h
 create mode 100644 libc/src/string/memory_utils/aarch64/inline_strlen.h
 create mode 100644 libc/src/string/memory_utils/x86_64/inline_strlen.h

diff --git a/libc/src/string/inline_strlen.h b/libc/src/string/inline_strlen.h
new file mode 100644
index 0000000000000..7ea27cf6e1813
--- /dev/null
+++ b/libc/src/string/inline_strlen.h
@@ -0,0 +1,38 @@
+//===-- Strlen implementation -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRLEN_H
+
+#include "src/__support/macros/attributes.h"               // LIBC_INLINE
+#include "src/__support/macros/properties/architectures.h" // LIBC_TARGET_ARCH_IS_
+
+#include <stddef.h> // size_t
+
+#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#if defined(LIBC_TARGET_ARCH_IS_X86)
+#include "src/string/memory_utils/x86_64/inline_strlen.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_x86
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "src/string/memory_utils/aarch64/inline_memcpy.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_aarch64
+#else
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_wide_read
+#endif
+
+namespace LIBC_NAMESPACE_DECL {
+
+[[gnu::flatten]] LIBC_INLINE void
+inline_memcpy(void *__restrict dst, const void *__restrict src, size_t count) {
+  LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY(reinterpret_cast<Ptr>(dst),
+                                      reinterpret_cast<CPtr>(src), count);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_INLINE_STRLEN_H
diff --git a/libc/src/string/memory_utils/aarch64/inline_strlen.h b/libc/src/string/memory_utils/aarch64/inline_strlen.h
new file mode 100644
index 0000000000000..2b9f226d14187
--- /dev/null
+++ b/libc/src/string/memory_utils/aarch64/inline_strlen.h
@@ -0,0 +1,50 @@
+//===-- Strlen implementation for aarch64 ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
+
+#include <arm_neon.h>
+#include <stddef.h> // size_t
+
+namespace LIBC_NAMESPACE_DECL {
+
+size_t string_length_neon(const char* src) {
+  using Vector __attribute__((may_alias)) = uint8x8_t;
+  uintptr_t misalign_bytes = reinterpret_case<uintptr_t>(src) % sizeof(Vector);
+  Vector *block_ptr = reinterpret_cast<Vector *>(src - misalign_bytes);
+  if (misalign_bytes) {
+    Vector v = *block_ptr;
+    Vector vcmp = vceqz_u8(v);
+    uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
+    uint64_t cmp = vget_lane_u64(cmp_mask, 0);
+    cmp = cmp >> (misalign_bytes << 3);
+    if (cmp) return __builtin_ctzl(cmp) >> 3;
+    ++block_ptr;
+  }
+  while (true) {
+    Vector v = *block_ptr;
+    Vector vcmp = vceqz_u8(v);
+    uint64x1_t cmp_mask = vreinterpret_u64_s8(vcmp);
+    uint64_t cmp = vget_lane_u64(cmp_mask, 0);
+    if (cmp)
+      return static_cast<size_t>(reinterpret_case<uintptr_t>(block_ptr) -
+                                 reinterpret_case<uintptr_t>(src) +
+                                 (__builtin_ctzl(cmp) >> 3));
+    block_ptr++;
+  }
+}
+
+template <typename T>
+[[maybe_unused]] LIBC_INLINE void string_length_aarch64(const char *src) {
+  return inline_string_length_neon(src);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_AARCH64_INLINE_STRLEN_H
diff --git a/libc/src/string/memory_utils/x86_64/inline_strlen.h b/libc/src/string/memory_utils/x86_64/inline_strlen.h
new file mode 100644
index 0000000000000..ffdf8938d62fd
--- /dev/null
+++ b/libc/src/string/memory_utils/x86_64/inline_strlen.h
@@ -0,0 +1,116 @@
+//===-- Strlen implementation for x86_64 ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
+#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
+
+#include "src/string/memory_utils/op_x86.h"  // K_AVX
+
+#include <stddef.h> // size_t
+#include <x86intrin.h>
+namespace LIBC_NAMESPACE_DECL {
+
+#if defined(__SSE2__)
+[[maybe_unused]] LIBC_INLINE size_t string_length_sse2(const char *src) {
+  using Vector __attribute__((may_alias)) = __m128i;
+  Vector z = _mm_setzero_si128();
+  uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+  const Vector *block_ptr = reinterpret_cast<const Vector *>(src - misalign_bytes);
+  if (misalign_bytes)
+    {
+      Vector v = _mm_load_si128 (block_ptr);
+      Vector vcmp = _mm_cmpeq_epi8 (z, v);
+      // shift away results in irrelevant bytes.
+      int cmp = _mm_movemask_epi8 (vcmp) >> misalign_bytes;
+      if (cmp)
+        return __builtin_ctz (cmp);
+      block_ptr++;
+    }
+  while (true)
+    {
+      Vector v = _mm_load_si128 (block_ptr);
+      Vector vcmp = _mm_cmpeq_epi8 (z, v);
+      int cmp = _mm_movemask_epi8 (vcmp);
+      if (cmp)
+        return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+                                   reinterpret_cast<uintptr_t>(src) +
+                                   __builtin_ctz(cmp));
+      block_ptr++;
+    }
+}
+#endif
+
+#if defined(__AVX2__)
+[[maybe_unused]] LIBC_INLINE size_t string_length_avx2(const char *src) {
+  using Vector __attribute__((may_alias)) = __mm256i;
+  Vector z = _mm256_setzero_si256();
+  uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+  const Vector *block_ptr = reinterpret_cast<const Vector *>(src - misalign_bytes);
+  if (misalign_bytes)
+    {
+      Vector v = _mm256_load_si256 (block_ptr);
+      Vector vcmp = _mm256_cmpeq_epi8 (z, v);
+      // shift away results in irrelevant bytes.
+      int cmp = _mm256_movemask_epi8 (vcmp) >> misalign_bytes;
+      if (cmp)
+        return __builtin_ctz(cmp);
+      block_ptr++;
+  }
+  while (true)
+    {
+      Vector v = _mm256_load_si256 (block_ptr);
+      Vector vcmp = _mm256_cmpeq_epi8 (z, v);
+      int cmp = _mm256_movemask_epi8 (vcmp);
+      if (cmp)
+        return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+                                   reinterpret_cast<uintptr_t>(src) +
+                                   __builtin_ctz(cmp));
+      block_ptr++;
+    }
+}
+#endif  // __AVX__
+
+#if defined(__AVX512F__)
+[[maybe_unused]] LIBC_INLINE size_t string_length_avx512(const char *src) {
+  using Vector __attribute__((may_alias)) = __mm512i;
+  Vector z = _mm512_setzero_si512();
+  uintptr_t misalign_bytes = reinterpret_cast<uintptr_t>(src) % sizeof(Vector);
+  const Vector *block_ptr = reinterpret_cast<const Vector *>(src - misalign_bytes);
+  if (misalign_bytes) {
+    Vector v = _mm512_load_si512(block_ptr);
+    __mmask64 cmp = _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ) >> misalign_bytes;
+    if (cmp)
+      return __builtin_ctzl(cmp);
+    block_ptr++;
+  }
+  while (true)
+    {
+    Vector v = _mm512_load_si512(block_ptr);
+    __mmask64 cmp = _mm512_cmp_epu8_mask(z, v, _MM_CMPINT_EQ);
+    if (cmp)
+      return static_cast<size_t>(reinterpret_cast<uintptr_t>(block_ptr) -
+                                 reinterpret_cast<uintptr_t>(src) +
+                                 __builtin_ctz(cmp));
+    block_ptr++;
+    }
+}
+#endif  // __AVX512F__
+
+template<typename T> LIBC_INLINE
+size_t string_length_x86_64(const char *src) {
+#if defined(__AVX512F__)
+  return string_length_avx512(src);
+#endif
+#if defined(__AVX__)
+  return string_length_avx2(src);
+#endif
+  return string_length_sse2(src);
+}
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_X86_64_INLINE_STRLEN_H
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 80e5783c7890b..8312ef895b243 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -22,6 +22,19 @@
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 
+
+#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
+#if defined(LIBC_TARGET_ARCH_IS_X86)
+#include "src/string/memory_utils/x86_64/inline_strlen.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_x86_64
+#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
+#include "src/string/memory_utils/aarch64/inline_strlen.h"
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_aarch64
+#else
+#define LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ string_length_wide_read
+#endif
+#endif
+
 namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
@@ -90,7 +103,7 @@ template <typename T> LIBC_INLINE size_t string_length(const T *src) {
   // be aligned to a word boundary, so it's the size we use for reading the
   // string a block at a time.
   if constexpr (cpp::is_same_v<T, char>)
-    return string_length_wide_read<unsigned int>(src);
+    return LIBC_SRC_STRING_MEMORY_UTILS_STRLEN_WIDE_READ<unsigned int>(src);
 #endif
   size_t length;
   for (length = 0; *src; ++src, ++length)
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index d530f7148ab96..21361c6a1a68f 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -4826,6 +4826,7 @@ libc_support_library(
         "src/string/memory_utils/aarch64/inline_memcpy.h",
         "src/string/memory_utils/aarch64/inline_memmove.h",
         "src/string/memory_utils/aarch64/inline_memset.h",
+        "src/string/memory_utils/aarch64/inline_strlen.h",
         "src/string/memory_utils/arm/common.h",
         "src/string/memory_utils/arm/inline_memcpy.h",
         "src/string/memory_utils/arm/inline_memset.h",
@@ -4850,6 +4851,7 @@ libc_support_library(
         "src/string/memory_utils/x86_64/inline_memcpy.h",
         "src/string/memory_utils/x86_64/inline_memmove.h",
         "src/string/memory_utils/x86_64/inline_memset.h",
+        "src/string/memory_utils/x86_64/inline_strlen.h",
     ],
     deps = [
         ":__support_common",
@@ -4874,6 +4876,7 @@ libc_support_library(
         ":__support_macros_optimization",
         ":hdr_limits_macros",
         ":llvm_libc_types_size_t",
+        ":string_memory_utils",
         ":types_size_t",
     ],
 )
diff --git a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
index b49e7c3fad954..209cd5ae80d29 100644
--- a/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
+++ b/utils/bazel/llvm-project-overlay/libc/libc_configure_options.bzl
@@ -39,7 +39,7 @@ LIBC_CONFIGURE_OPTIONS = [
     # "LIBC_COPT_SCANF_DISABLE_FLOAT",
     # "LIBC_COPT_SCANF_DISABLE_INDEX_MODE",
     "LIBC_COPT_STDIO_USE_SYSTEM_FILE",
-    # "LIBC_COPT_STRING_UNSAFE_WIDE_READ",
+    "LIBC_COPT_STRING_UNSAFE_WIDE_READ",
     # "LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH",
     # "LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE",
     # "LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION",



More information about the llvm-commits mailing list