[libc-commits] [libc] [libc] Speed up memmove overlapping check (PR #70017)

Dmitry Vyukov via libc-commits libc-commits at lists.llvm.org
Tue Oct 24 06:02:59 PDT 2023


https://github.com/dvyukov updated https://github.com/llvm/llvm-project/pull/70017

>From 81d8231c88878ecfa685eb4085ff2d2993764425 Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov at google.com>
Date: Tue, 24 Oct 2023 10:52:00 +0200
Subject: [PATCH] [libc] Speed up memmove overlapping check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use a check that requries fewer instructions and cheaper.
Current code:

   1b704:       48 39 f7                cmp    %rsi,%rdi
   1b707:       48 89 f0                mov    %rsi,%rax
   1b70a:       48 0f 47 c7             cmova  %rdi,%rax
   1b70e:       48 89 f9                mov    %rdi,%rcx
   1b711:       48 0f 47 ce             cmova  %rsi,%rcx
   1b715:       48 01 d1                add    %rdx,%rcx
   1b718:       48 39 c1                cmp    %rax,%rcx

New code:

   1b704:       48 89 f8                mov    %rdi,%rax
   1b707:       48 29 f0                sub    %rsi,%rax
   1b70a:       48 89 c1                mov    %rax,%rcx
   1b70d:       48 f7 d9                neg    %rcx
   1b710:       48 0f 48 c8             cmovs  %rax,%rcx
   1b714:       48 39 d1                cmp    %rdx,%rcx

                 │  baseline   │              disjoint              │
                 │   sec/op    │   sec/op     vs base               │
memmove/Google_A   3.910n ± 0%   3.861n ± 1%  -1.26% (p=0.000 n=50)

            │  baseline   │              disjoint               │
            │   sec/op    │   sec/op     vs base                │
memmove/1     2.724n ± 3%   2.441n ± 0%  -10.37% (n=50)
memmove/2     2.878n ± 0%   2.713n ± 0%   -5.73% (n=50)
memmove/3     2.835n ± 0%   2.593n ± 0%   -8.54% (n=50)
memmove/4     3.032n ± 0%   2.776n ± 0%   -8.45% (p=0.000 n=50)
memmove/5     2.833n ± 0%   2.600n ± 0%   -8.20% (p=0.000 n=50)
memmove/6     2.758n ± 0%   2.744n ± 0%   -0.52% (p=0.000 n=50)
memmove/7     2.762n ± 0%   2.744n ± 0%   -0.63% (p=0.000 n=50)
memmove/8     2.763n ± 0%   2.750n ± 0%   -0.46% (p=0.000 n=50)
memmove/9     3.182n ± 0%   3.269n ± 0%   +2.75% (p=0.000 n=50)
memmove/10    3.185n ± 0%   3.270n ± 0%   +2.64% (p=0.000 n=50)
memmove/11    3.188n ± 0%   3.277n ± 0%   +2.79% (p=0.000 n=50)
memmove/12    3.190n ± 0%   3.279n ± 0%   +2.82% (p=0.000 n=50)
memmove/13    3.194n ± 0%   3.281n ± 0%   +2.73% (p=0.000 n=50)
memmove/14    3.197n ± 0%   3.285n ± 0%   +2.77% (p=0.000 n=50)
memmove/15    3.198n ± 0%   3.282n ± 0%   +2.62% (p=0.000 n=50)
memmove/16    3.201n ± 0%   3.284n ± 0%   +2.61% (p=0.000 n=50)
memmove/17    3.564n ± 0%   3.320n ± 0%   -6.86% (p=0.000 n=50)
memmove/18    3.572n ± 0%   3.313n ± 0%   -7.25% (p=0.000 n=50)
memmove/19    3.572n ± 0%   3.325n ± 0%   -6.94% (p=0.000 n=50)
memmove/20    3.575n ± 0%   3.319n ± 0%   -7.15% (p=0.000 n=50)
memmove/21    3.578n ± 0%   3.327n ± 0%   -7.03% (p=0.000 n=50)
memmove/22    3.581n ± 0%   3.330n ± 0%   -7.01% (p=0.000 n=50)
memmove/23    3.582n ± 0%   3.354n ± 1%   -6.37% (p=0.000 n=50)
memmove/24    3.587n ± 0%   3.347n ± 1%   -6.71% (p=0.000 n=50)
memmove/25    3.591n ± 0%   3.320n ± 0%   -7.55% (p=0.000 n=50)
memmove/26    3.593n ± 0%   3.348n ± 0%   -6.82% (p=0.000 n=50)
memmove/27    3.596n ± 0%   3.346n ± 0%   -6.94% (p=0.000 n=50)
memmove/28    3.597n ± 0%   3.357n ± 0%   -6.67% (p=0.000 n=50)
memmove/29    3.601n ± 0%   3.340n ± 0%   -7.23% (p=0.000 n=50)
memmove/30    3.602n ± 0%   3.345n ± 0%   -7.12% (p=0.000 n=50)
memmove/31    3.608n ± 0%   3.357n ± 0%   -6.94% (p=0.000 n=50)
memmove/32    3.605n ± 0%   3.352n ± 0%   -7.01% (p=0.000 n=50)
memmove/33    4.128n ± 1%   3.829n ± 0%   -7.23% (p=0.000 n=50)
memmove/34    4.149n ± 0%   3.836n ± 0%   -7.54% (p=0.000 n=50)
memmove/35    4.134n ± 0%   3.839n ± 0%   -7.15% (n=50)
memmove/36    4.151n ± 0%   3.842n ± 0%   -7.45% (n=50)
memmove/37    4.152n ± 0%   3.841n ± 0%   -7.49% (p=0.000 n=50)
memmove/38    4.159n ± 0%   3.844n ± 0%   -7.58% (p=0.000 n=50)
memmove/39    4.165n ± 0%   3.841n ± 0%   -7.78% (p=0.000 n=50)
memmove/40    4.162n ± 0%   3.837n ± 0%   -7.81% (p=0.000 n=50)
memmove/41    4.161n ± 0%   3.845n ± 0%   -7.58% (p=0.000 n=50)
memmove/42    4.164n ± 0%   3.851n ± 0%   -7.53% (p=0.000 n=50)
memmove/43    4.165n ± 0%   3.843n ± 0%   -7.74% (p=0.000 n=50)
memmove/44    4.175n ± 0%   3.847n ± 0%   -7.83% (p=0.000 n=50)
memmove/45    4.170n ± 0%   3.849n ± 0%   -7.70% (p=0.000 n=50)
memmove/46    4.175n ± 0%   3.850n ± 0%   -7.79% (p=0.000 n=50)
memmove/47    4.180n ± 0%   3.851n ± 0%   -7.87% (p=0.000 n=50)
memmove/48    4.178n ± 0%   3.852n ± 0%   -7.81% (p=0.000 n=50)
memmove/49    4.175n ± 0%   3.851n ± 0%   -7.76% (n=50)
memmove/50    4.178n ± 0%   3.855n ± 0%   -7.73% (p=0.000 n=50)
memmove/51    4.190n ± 0%   3.859n ± 0%   -7.91% (p=0.000 n=50)
memmove/52    4.188n ± 0%   3.859n ± 0%   -7.84% (p=0.000 n=50)
memmove/53    4.191n ± 0%   3.863n ± 0%   -7.82% (p=0.000 n=50)
memmove/54    4.192n ± 0%   3.860n ± 0%   -7.91% (p=0.000 n=50)
memmove/55    4.192n ± 0%   3.869n ± 0%   -7.70% (p=0.000 n=50)
memmove/56    4.204n ± 0%   3.866n ± 0%   -8.05% (p=0.000 n=50)
memmove/57    4.198n ± 0%   3.864n ± 0%   -7.95% (p=0.000 n=50)
memmove/58    4.202n ± 0%   3.865n ± 0%   -8.02% (p=0.000 n=50)
memmove/59    4.208n ± 0%   3.868n ± 0%   -8.09% (p=0.000 n=50)
memmove/60    4.205n ± 0%   3.873n ± 0%   -7.89% (p=0.000 n=50)
memmove/61    4.212n ± 0%   3.872n ± 0%   -8.08% (p=0.000 n=50)
memmove/62    4.214n ± 0%   3.870n ± 0%   -8.16% (p=0.000 n=50)
memmove/63    4.215n ± 0%   3.877n ± 0%   -8.02% (p=0.000 n=50)
memmove/64    4.217n ± 0%   3.881n ± 0%   -7.99% (p=0.000 n=50)
memmove/65    4.990n ± 0%   4.683n ± 0%   -6.15% (p=0.000 n=50)
memmove/66    5.022n ± 0%   4.719n ± 0%   -6.03% (p=0.000 n=50)
memmove/67    5.030n ± 0%   4.725n ± 0%   -6.07% (p=0.000 n=50)
memmove/68    5.035n ± 0%   4.724n ± 0%   -6.18% (p=0.000 n=50)
memmove/69    5.030n ± 0%   4.725n ± 0%   -6.07% (p=0.000 n=50)
memmove/70    5.040n ± 0%   4.728n ± 0%   -6.19% (p=0.000 n=50)
memmove/71    5.053n ± 0%   4.728n ± 0%   -6.43% (p=0.000 n=50)
memmove/72    5.050n ± 0%   4.732n ± 0%   -6.29% (p=0.000 n=50)
memmove/73    5.049n ± 0%   4.733n ± 0%   -6.24% (p=0.000 n=50)
memmove/74    5.054n ± 0%   4.734n ± 0%   -6.34% (p=0.000 n=50)
memmove/75    5.063n ± 0%   4.736n ± 0%   -6.46% (p=0.000 n=50)
memmove/76    5.046n ± 0%   4.741n ± 0%   -6.04% (p=0.000 n=50)
memmove/77    5.057n ± 0%   4.741n ± 0%   -6.25% (p=0.000 n=50)
memmove/78    5.077n ± 0%   4.739n ± 0%   -6.65% (p=0.000 n=50)
memmove/79    5.074n ± 0%   4.746n ± 0%   -6.46% (p=0.000 n=50)
memmove/80    5.085n ± 0%   4.747n ± 0%   -6.65% (p=0.000 n=50)
memmove/81    5.077n ± 0%   4.735n ± 0%   -6.74% (p=0.000 n=50)
memmove/82    5.087n ± 0%   4.747n ± 0%   -6.68% (p=0.000 n=50)
memmove/83    5.087n ± 0%   4.754n ± 0%   -6.56% (p=0.000 n=50)
memmove/84    5.096n ± 0%   4.753n ± 0%   -6.73% (p=0.000 n=50)
memmove/85    5.082n ± 0%   4.749n ± 0%   -6.55% (p=0.000 n=50)
memmove/86    5.103n ± 0%   4.752n ± 0%   -6.87% (p=0.000 n=50)
memmove/87    5.096n ± 0%   4.760n ± 0%   -6.61% (p=0.000 n=50)
memmove/88    5.099n ± 0%   4.765n ± 0%   -6.55% (p=0.000 n=50)
memmove/89    5.104n ± 0%   4.757n ± 0%   -6.79% (p=0.000 n=50)
memmove/90    5.117n ± 0%   4.767n ± 0%   -6.84% (p=0.000 n=50)
memmove/91    5.100n ± 0%   4.766n ± 0%   -6.54% (p=0.000 n=50)
memmove/92    5.103n ± 0%   4.763n ± 0%   -6.67% (p=0.000 n=50)
memmove/93    5.115n ± 0%   4.772n ± 0%   -6.71% (p=0.000 n=50)
memmove/94    5.117n ± 0%   4.769n ± 0%   -6.80% (p=0.000 n=50)
memmove/95    5.131n ± 0%   4.775n ± 0%   -6.94% (p=0.000 n=50)
memmove/96    5.129n ± 0%   4.772n ± 0%   -6.97% (p=0.000 n=50)
memmove/97    5.130n ± 0%   4.764n ± 0%   -7.13% (p=0.000 n=50)
memmove/98    5.134n ± 0%   4.780n ± 0%   -6.89% (p=0.000 n=50)
memmove/99    5.141n ± 0%   4.780n ± 0%   -7.03% (p=0.000 n=50)
memmove/100   5.141n ± 0%   4.780n ± 0%   -7.02% (p=0.000 n=50)
memmove/101   5.150n ± 0%   4.782n ± 0%   -7.14% (p=0.000 n=50)
memmove/102   5.150n ± 0%   4.790n ± 0%   -6.99% (p=0.000 n=50)
memmove/103   5.156n ± 0%   4.788n ± 0%   -7.14% (n=50)
memmove/104   5.157n ± 0%   4.793n ± 0%   -7.05% (p=0.000 n=50)
memmove/105   5.147n ± 0%   4.791n ± 0%   -6.90% (p=0.000 n=50)
memmove/106   5.167n ± 0%   4.793n ± 0%   -7.23% (p=0.000 n=50)
memmove/107   5.165n ± 0%   4.801n ± 0%   -7.06% (p=0.000 n=50)
memmove/108   5.173n ± 0%   4.800n ± 0%   -7.21% (p=0.000 n=50)
memmove/109   5.173n ± 0%   4.797n ± 0%   -7.27% (p=0.000 n=50)
memmove/110   5.171n ± 0%   4.808n ± 0%   -7.01% (p=0.000 n=50)
memmove/111   5.180n ± 0%   4.799n ± 0%   -7.36% (p=0.000 n=50)
memmove/112   5.185n ± 0%   4.812n ± 0%   -7.19% (p=0.000 n=50)
memmove/113   5.187n ± 0%   4.797n ± 0%   -7.53% (p=0.000 n=50)
memmove/114   5.183n ± 0%   4.809n ± 0%   -7.21% (n=50)
memmove/115   5.193n ± 0%   4.811n ± 0%   -7.36% (p=0.000 n=50)
memmove/116   5.196n ± 0%   4.815n ± 0%   -7.32% (p=0.000 n=50)
memmove/117   5.199n ± 0%   4.816n ± 0%   -7.37% (p=0.000 n=50)
memmove/118   5.198n ± 0%   4.811n ± 0%   -7.45% (p=0.000 n=50)
memmove/119   5.203n ± 0%   4.818n ± 0%   -7.40% (p=0.000 n=50)
memmove/120   5.195n ± 0%   4.823n ± 0%   -7.16% (p=0.000 n=50)
memmove/121   5.203n ± 0%   4.812n ± 0%   -7.51% (p=0.000 n=50)
memmove/122   5.204n ± 0%   4.818n ± 0%   -7.42% (n=50)
memmove/123   5.202n ± 0%   4.822n ± 0%   -7.31% (p=0.000 n=50)
memmove/124   5.216n ± 0%   4.823n ± 0%   -7.54% (p=0.000 n=50)
memmove/125   5.227n ± 0%   4.823n ± 0%   -7.72% (p=0.000 n=50)
memmove/126   5.235n ± 0%   4.830n ± 0%   -7.74% (p=0.000 n=50)
memmove/127   5.237n ± 0%   4.833n ± 0%   -7.72% (p=0.000 n=50)
memmove/128   5.241n ± 0%   4.832n ± 0%   -7.81% (p=0.000 n=50)
memmove/129   6.460n ± 0%   5.858n ± 0%   -9.31% (p=0.000 n=50)
memmove/130   7.539n ± 0%   6.634n ± 0%  -12.00% (p=0.000 n=50)
memmove/131   7.542n ± 0%   6.623n ± 0%  -12.18% (p=0.000 n=50)
memmove/132   7.527n ± 0%   6.667n ± 1%  -11.43% (p=0.000 n=50)
memmove/133   7.521n ± 0%   6.631n ± 0%  -11.83% (p=0.000 n=50)
memmove/134   7.531n ± 0%   6.642n ± 0%  -11.81% (p=0.000 n=50)
memmove/135   7.541n ± 0%   6.692n ± 1%  -11.25% (p=0.000 n=50)
memmove/136   7.549n ± 0%   6.657n ± 0%  -11.81% (p=0.000 n=50)
memmove/137   7.544n ± 0%   6.646n ± 0%  -11.90% (p=0.000 n=50)
memmove/138   7.557n ± 0%   6.673n ± 1%  -11.70% (p=0.000 n=50)
memmove/139   7.545n ± 0%   6.654n ± 0%  -11.81% (n=50)
memmove/140   7.559n ± 0%   6.680n ± 1%  -11.63% (p=0.000 n=50)
memmove/141   7.560n ± 0%   6.664n ± 0%  -11.85% (p=0.000 n=50)
memmove/142   7.556n ± 0%   6.679n ± 0%  -11.62% (p=0.000 n=50)
memmove/143   7.570n ± 0%   6.683n ± 1%  -11.71% (p=0.000 n=50)
memmove/144   7.586n ± 0%   6.683n ± 0%  -11.91% (p=0.000 n=50)
memmove/145   7.593n ± 0%   6.665n ± 0%  -12.22% (p=0.000 n=50)
memmove/146   7.591n ± 0%   6.665n ± 0%  -12.20% (p=0.000 n=50)
memmove/147   7.598n ± 0%   6.665n ± 0%  -12.27% (p=0.000 n=50)
memmove/148   7.598n ± 0%   6.670n ± 0%  -12.21% (p=0.000 n=50)
memmove/149   7.593n ± 0%   6.691n ± 0%  -11.88% (p=0.000 n=50)
memmove/150   7.625n ± 0%   6.713n ± 1%  -11.97% (p=0.000 n=50)
memmove/151   7.603n ± 0%   6.710n ± 1%  -11.74% (p=0.000 n=50)
memmove/152   7.613n ± 0%   6.701n ± 1%  -11.97% (p=0.000 n=50)
memmove/153   7.595n ± 0%   6.710n ± 0%  -11.65% (p=0.000 n=50)
memmove/154   7.614n ± 0%   6.721n ± 0%  -11.74% (p=0.000 n=50)
memmove/155   7.615n ± 0%   6.709n ± 0%  -11.89% (p=0.000 n=50)
memmove/156   7.613n ± 0%   6.693n ± 0%  -12.08% (p=0.000 n=50)
memmove/157   7.628n ± 0%   6.708n ± 0%  -12.05% (p=0.000 n=50)
memmove/158   7.629n ± 0%   6.706n ± 0%  -12.10% (p=0.000 n=50)
memmove/159   7.639n ± 0%   6.724n ± 0%  -11.98% (p=0.000 n=50)
memmove/160   7.619n ± 0%   6.702n ± 0%  -12.04% (p=0.000 n=50)
memmove/161   7.653n ± 0%   6.698n ± 0%  -12.49% (p=0.000 n=50)
memmove/162   8.104n ± 0%   7.140n ± 1%  -11.89% (p=0.000 n=50)
memmove/163   8.141n ± 0%   7.187n ± 1%  -11.72% (p=0.000 n=50)
memmove/164   8.154n ± 0%   7.107n ± 0%  -12.84% (p=0.000 n=50)
memmove/165   8.143n ± 0%   7.117n ± 0%  -12.59% (p=0.000 n=50)
memmove/166   8.176n ± 0%   7.110n ± 0%  -13.04% (p=0.000 n=50)
memmove/167   8.194n ± 0%   7.168n ± 1%  -12.52% (p=0.000 n=50)
memmove/168   8.214n ± 0%   7.188n ± 1%  -12.50% (p=0.000 n=50)
memmove/169   8.220n ± 0%   7.242n ± 1%  -11.90% (p=0.000 n=50)
memmove/170   8.228n ± 0%   7.244n ± 1%  -11.96% (p=0.000 n=50)
memmove/171   8.263n ± 0%   7.184n ± 0%  -13.06% (p=0.000 n=50)
memmove/172   8.259n ± 0%   7.325n ± 1%  -11.31% (p=0.000 n=50)
memmove/173   8.271n ± 0%   7.225n ± 0%  -12.65% (p=0.000 n=50)
memmove/174   8.284n ± 0%   7.287n ± 1%  -12.04% (p=0.000 n=50)
memmove/175   8.289n ± 0%   7.282n ± 1%  -12.15% (p=0.000 n=50)
memmove/176   8.309n ± 0%   7.328n ± 1%  -11.81% (p=0.000 n=50)
memmove/177   8.317n ± 0%   7.264n ± 1%  -12.67% (p=0.000 n=50)
memmove/178   8.302n ± 0%   7.342n ± 1%  -11.57% (p=0.000 n=50)
memmove/179   8.309n ± 0%   7.357n ± 1%  -11.45% (p=0.000 n=50)
memmove/180   8.304n ± 0%   7.318n ± 1%  -11.87% (p=0.000 n=50)
memmove/181   8.312n ± 0%   7.363n ± 1%  -11.42% (p=0.000 n=50)
memmove/182   8.315n ± 0%   7.320n ± 1%  -11.96% (p=0.000 n=50)
memmove/183   8.330n ± 0%   7.286n ± 1%  -12.53% (p=0.000 n=50)
memmove/184   8.310n ± 0%   7.324n ± 1%  -11.86% (p=0.000 n=50)
memmove/185   8.303n ± 0%   7.267n ± 1%  -12.47% (p=0.000 n=50)
memmove/186   8.287n ± 0%   7.312n ± 1%  -11.76% (p=0.000 n=50)
memmove/187   8.298n ± 0%   7.395n ± 2%  -10.88% (p=0.000 n=50)
memmove/188   8.296n ± 0%   7.339n ± 1%  -11.54% (p=0.000 n=50)
memmove/189   8.306n ± 0%   7.299n ± 1%  -12.12% (p=0.000 n=50)
memmove/190   8.281n ± 0%   7.309n ± 1%  -11.74% (p=0.000 n=50)
memmove/191   8.299n ± 0%   7.282n ± 1%  -12.26% (p=0.000 n=50)
memmove/192   8.281n ± 0%   7.335n ± 1%  -11.41% (p=0.000 n=50)
memmove/193   8.299n ± 0%   7.325n ± 1%  -11.74% (p=0.000 n=50)
memmove/194   8.641n ± 0%   8.034n ± 0%   -7.02% (p=0.000 n=50)
memmove/195   8.667n ± 0%   8.073n ± 0%   -6.85% (p=0.000 n=50)
memmove/196   8.666n ± 0%   8.030n ± 0%   -7.34% (p=0.000 n=50)
memmove/197   8.660n ± 0%   8.096n ± 1%   -6.51% (p=0.000 n=50)
memmove/198   8.688n ± 0%   8.047n ± 0%   -7.39% (p=0.000 n=50)
memmove/199   8.678n ± 0%   8.061n ± 0%   -7.11% (p=0.000 n=50)
memmove/200   8.669n ± 0%   8.034n ± 0%   -7.32% (p=0.000 n=50)
memmove/201   8.692n ± 0%   8.061n ± 0%   -7.26% (p=0.000 n=50)
memmove/202   8.668n ± 0%   8.060n ± 0%   -7.02% (p=0.000 n=50)
memmove/203   8.687n ± 0%   8.066n ± 0%   -7.15% (p=0.000 n=50)
memmove/204   8.699n ± 0%   8.076n ± 0%   -7.16% (p=0.000 n=50)
memmove/205   8.676n ± 0%   8.085n ± 0%   -6.82% (p=0.000 n=50)
memmove/206   8.684n ± 0%   8.101n ± 1%   -6.71% (p=0.000 n=50)
memmove/207   8.725n ± 0%   8.099n ± 0%   -7.18% (p=0.000 n=50)
memmove/208   8.674n ± 0%   8.073n ± 0%   -6.92% (p=0.000 n=50)
memmove/209   8.697n ± 0%   8.088n ± 0%   -7.01% (p=0.000 n=50)
memmove/210   8.733n ± 0%   8.076n ± 0%   -7.53% (p=0.000 n=50)
memmove/211   8.732n ± 0%   8.104n ± 0%   -7.19% (p=0.000 n=50)
memmove/212   8.730n ± 0%   8.091n ± 0%   -7.32% (p=0.000 n=50)
memmove/213   8.728n ± 0%   8.100n ± 0%   -7.19% (p=0.000 n=50)
memmove/214   8.744n ± 1%   8.081n ± 1%   -7.57% (p=0.000 n=50)
memmove/215   8.734n ± 0%   8.150n ± 0%   -6.68% (p=0.000 n=50)
memmove/216   8.748n ± 0%   8.116n ± 0%   -7.23% (p=0.000 n=50)
memmove/217   8.751n ± 0%   8.129n ± 1%   -7.11% (p=0.000 n=50)
memmove/218   8.747n ± 0%   8.114n ± 0%   -7.23% (p=0.000 n=50)
memmove/219   8.733n ± 0%   8.159n ± 0%   -6.57% (p=0.000 n=50)
memmove/220   8.764n ± 0%   8.145n ± 0%   -7.06% (p=0.000 n=50)
memmove/221   8.764n ± 0%   8.142n ± 0%   -7.10% (p=0.000 n=50)
memmove/222   8.775n ± 0%   8.152n ± 0%   -7.10% (p=0.000 n=50)
memmove/223   8.771n ± 0%   8.143n ± 0%   -7.16% (p=0.000 n=50)
memmove/224   8.778n ± 0%   8.175n ± 1%   -6.87% (p=0.000 n=50)
memmove/225   8.794n ± 0%   8.138n ± 0%   -7.45% (p=0.000 n=50)
memmove/226   10.13n ± 0%   10.06n ± 0%   -0.71% (p=0.000 n=50)
memmove/227   10.14n ± 0%   10.08n ± 0%   -0.53% (p=0.000 n=50)
memmove/228   10.13n ± 0%   10.08n ± 0%   -0.56% (p=0.000 n=50)
memmove/229   10.17n ± 0%   10.11n ± 0%   -0.56% (p=0.000 n=50)
memmove/230   10.17n ± 0%   10.13n ± 0%   -0.38% (p=0.003 n=50)
memmove/231   10.16n ± 0%   10.12n ± 0%   -0.41% (p=0.001 n=50)
memmove/232   10.19n ± 0%   10.12n ± 0%   -0.67% (p=0.000 n=50)
memmove/233   10.21n ± 0%   10.14n ± 0%   -0.71% (p=0.000 n=50)
memmove/234   10.24n ± 0%   10.16n ± 0%   -0.79% (p=0.000 n=50)
memmove/235   10.24n ± 0%   10.16n ± 0%   -0.76% (p=0.000 n=50)
memmove/236   10.25n ± 0%   10.16n ± 0%   -0.81% (p=0.000 n=50)
memmove/237   10.24n ± 0%   10.17n ± 0%   -0.69% (p=0.000 n=50)
memmove/238   10.27n ± 0%   10.19n ± 0%   -0.79% (p=0.000 n=50)
memmove/239   10.29n ± 0%   10.19n ± 0%   -0.90% (p=0.000 n=50)
memmove/240   10.30n ± 0%   10.20n ± 0%   -0.95% (p=0.000 n=50)
memmove/241   10.29n ± 0%   10.20n ± 0%   -0.91% (p=0.000 n=50)
memmove/242   10.30n ± 0%   10.22n ± 0%   -0.80% (p=0.000 n=50)
memmove/243   10.32n ± 0%   10.23n ± 0%   -0.87% (p=0.000 n=50)
memmove/244   10.32n ± 0%   10.24n ± 0%   -0.74% (p=0.000 n=50)
memmove/245   10.33n ± 0%   10.23n ± 0%   -0.97% (p=0.000 n=50)
memmove/246   10.33n ± 0%   10.24n ± 0%   -0.92% (p=0.000 n=50)
memmove/247   10.31n ± 0%   10.24n ± 0%   -0.69% (p=0.000 n=50)
memmove/248   10.32n ± 0%   10.26n ± 0%   -0.55% (p=0.000 n=50)
memmove/249   10.33n ± 0%   10.28n ± 0%   -0.52% (p=0.000 n=50)
memmove/250   10.34n ± 0%   10.27n ± 0%   -0.66% (p=0.000 n=50)
memmove/251   10.32n ± 0%   10.27n ± 0%   -0.45% (p=0.000 n=50)
memmove/252   10.34n ± 0%   10.30n ± 0%   -0.39% (p=0.005 n=50)
memmove/253   10.33n ± 0%   10.27n ± 0%   -0.57% (p=0.000 n=50)
memmove/254   10.33n ± 0%   10.27n ± 0%   -0.54% (p=0.000 n=50)
memmove/255   10.34n ± 0%   10.29n ± 0%   -0.50% (p=0.002 n=50)
memmove/256   10.36n ± 0%   10.31n ± 0%   -0.44% (p=0.006 n=50)
memmove/257   10.33n ± 0%   10.29n ± 0%   -0.36% (p=0.004 n=50)
geomean       6.142n        5.696n        -7.26%
---
 libc/src/string/memory_utils/utils.h | 17 +++++++----------
 1 file changed, 7 insertions(+), 10 deletions(-)

diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
index c467ae712b638a2..bd87169c051b750 100644
--- a/libc/src/string/memory_utils/utils.h
+++ b/libc/src/string/memory_utils/utils.h
@@ -89,16 +89,13 @@ template <size_t alignment, typename T> LIBC_INLINE T *assume_aligned(T *ptr) {
 // Returns true iff memory regions [p1, p1 + size] and [p2, p2 + size] are
 // disjoint.
 LIBC_INLINE bool is_disjoint(const void *p1, const void *p2, size_t size) {
-  const char *a = static_cast<const char *>(p1);
-  const char *b = static_cast<const char *>(p2);
-  if (a > b) {
-    // Swap a and b, this compiles down to conditionnal move for aarch64, x86
-    // and RISCV with zbb extension.
-    const char *tmp = a;
-    a = b;
-    b = tmp;
-  }
-  return a + size <= b;
+  const ptrdiff_t sdiff =
+      static_cast<const char *>(p1) - static_cast<const char *>(p2);
+  const size_t udiff = cpp::bit_cast<size_t>(sdiff);
+  // Integer promition would be caught here.
+  const size_t neg_udiff = cpp::bit_cast<size_t>(-sdiff);
+  // This is expected to compile a conditional move.
+  return sdiff >= 0 ? size <= udiff : size <= neg_udiff;
 }
 
 #if LIBC_HAS_BUILTIN(__builtin_memcpy_inline)



More information about the libc-commits mailing list