<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/58685>58685</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            std::atomic::fetch_xxx emit LOCK CMPXCHG instead of simpler LOCK instructions
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          dr-m
      </td>
    </tr>
</table>

<pre>
    The code generation for several std::atomic::fetch_ operations is suboptimal on IA-32 and AMD64. This is related to #36905 and #36373 and possibly other tickets. I verified this with `clang++-15 -c -O2` or `clang++-15 -c -O2 -m32 -march=i686` of the following:
```c++
#include <atomic>

// "void" functions generate the minimal IA-32 or AMD64 code
void lock_add(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_add(b); }
void lock_sub(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_sub(b); }
void lock_or(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_or(b); }
void lock_and(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_and(b); }
void lock_xor(std::atomic<uint32_t> &a, uint32_t b) { a.fetch_xor(b); }
// clang++-15: "lock inc"; g++-12: "lock add"
void lock_inc(std::atomic<uint32_t> &a) { a.fetch_add(1); }
// clang++-15: "lock dec"; g++-12: "lock sub"
void lock_dec(std::atomic<uint32_t> &a) { a.fetch_sub(1); }

// "lock add" degrades to lock xadd; add
uint32_t lock_add_result(std::atomic<uint32_t> &a, uint32_t b)
{
  return b + a.fetch_add(b);
}

// "lock sub" degrades to neg; lock xadd; sub
uint32_t lock_sub_result(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_sub(b) - b;
}

// "lock or" degrades to lock cmpxchg
uint32_t lock_or_or(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_or(b) | b;
}

// "lock or; and" degrades to lock cmpxchg
uint32_t lock_or_andneg(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_or(b) & ~b;
}

// "lock and" degrades to lock cmpxchg
uint32_t lock_and_and(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_and(b) & b;
}

// "lock and; or" degrades to lock cmpxchg
uint32_t lock_and_orneg(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_and(b) | ~b;
}

// "lock xor; or" degrades to lock cmpxchg
uint32_t lock_xor_or(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_xor(b) | b;
}

// "lock xor; and" degrades to lock cmpxchg
uint32_t lock_xor_andneg(std::atomic<uint32_t> &a, uint32_t b)
{
  return a.fetch_xor(b) & ~b;
}
```
</pre>
<img width="1px" height="1px" alt="" src="http://email.email.llvm.org/o/eJy1V0tzmzAQ_jVw2bEHhAFz4JDYfU3baQ899JYRQga1AnkkkdJ_35Ww0zwcJ_bEM5oIabWPb1_eVKr-W_5oOTBVc2h4zzW1QvWwURoMv8WjBGPrILnCRa3qBJu-N9yy9gbUdsdhQBgwQ6W2VnTIhDI-Xc0SArSv4errOlvM4Ucr_DPNJbW8BqsgIEmSFVHqn_lDkif-sFXGiEr-BWVbrsEK9ptbM4dPgFaJjXD8Tt4fYVsIsohJ2jcBucY1i1OYMZh9I3gPiOQ5Msy6xP2hmrVBshbZMvMcGxTN0QdSqj8C2RBxtA6iKyROi02SdrckET2TA3owSFZ7J73bEXdP3uNCgORWiRo32Aw9m_y28zr3OjvRe_dNvkPTvet8eCZJjh-kYr9vaI2Clk-CsxpEbxNyY9EEVJjRgKxgfwdVQAoI8mug8ymCkxR3HSTXSFk_VoMxfQM1k5QjapR-Ay1eyBElmFdv4bL-BZeNbwJmPIxml0oP8xm1uORy2gFzET8dzx2d3Kf7iJPHNnuuV9l8KH_ik62s-XErfcI8sdJznWHllH5PrXxUnPf8gwY2mtbcuC7l70dHQG63eb670O3L8UZzM0h7Xux3tuS7pgLYJe2ge6jw-TPlumc5hmby4wM0PW8cjgeg3LMDoPD6IqCetgWYIcPrILm6OBAf1m1H1jaHYCh9dn95CcFdkWJWrU6B4DKpP5xpx5Egl4vgpdGQDBG9ey2gM6Agy_kN-SUg_3u0R3IKDAzM6RnmwCh9sbjch4NpdkJgxinVTkc0XrBoxvOqZjy7bMYL1834qsLZT48hL-Msy6NFXJAirMukLpKChlZYycsj0_Y4jsA7YeHLt9VnWH39_nP18QP-3hvLae2mViO6rcRh2dPdvR6mITMctCxba7fGyfNebXBuHqo5Ux0epLzdb7OtVr84w4b_XhgzcIMf6TJbpmFbkjTfUJ7EKYlIHtcFW9ZkwTYsXrJNnlVVKGnFpSmD9DpI16Eo8R2JI1LEcUyiYp7m6Nw8W8Qsxz2tgkXEOyrk3CmeK92EuvQ2VENjkCiFwZH_jkjxP4Km53wvnw62Vbqs9awLva2lN_QfvtnAMg">