<table border="1" cellspacing="0" cellpadding="8">
    <tr>
        <th>Issue</th>
        <td>
            <a href=https://github.com/llvm/llvm-project/issues/119959>119959</a>
        </td>
    </tr>

    <tr>
        <th>Summary</th>
        <td>
            [llvm] cmpxchg16b uses pointer from overwritten rbx
        </td>
    </tr>

    <tr>
      <th>Labels</th>
      <td>
            new issue
      </td>
    </tr>

    <tr>
      <th>Assignees</th>
      <td>
      </td>
    </tr>

    <tr>
      <th>Reporter</th>
      <td>
          vasama
      </td>
    </tr>
</table>

<pre>
    Reduced IR:

```ll
target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "amd64-pc-windows-msvc19.41.34123"

%struct.anon = type { [2 x %"struct.(anonymous namespace)::mt_shared_object"], %"class.vsm::atomic_intrusive_ptr", [48 x i8], %"struct.std::atomic.3", [56 x i8], %"struct.std::atomic_flag", [60 x i8] }
%"struct.(anonymous namespace)::mt_shared_object" = type { %"class.vsm::detail::basic_intrusive_refcount", %"struct.std::atomic_flag", ptr, [40 x i8] }
%"class.vsm::detail::basic_intrusive_refcount" = type { %"struct.vsm::detail::intrusive_refcount_base" }
%"struct.vsm::detail::intrusive_refcount_base" = type { %"class.vsm::atomic" }
%"class.vsm::atomic" = type { i64 }
%"class.vsm::atomic_intrusive_ptr" = type { %"class.vsm::atomic.2" }
%"class.vsm::atomic.2" = type { %"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" }
%"struct.vsm::atomic_intrusive_ptr<(anonymous namespace)::mt_shared_object>::atom" = type { ptr, i64 }
%"struct.std::atomic.3" = type { %"struct.std::_Atomic_integral_facade.4" }
%"struct.std::_Atomic_integral_facade.4" = type { %"struct.std::_Atomic_integral.5" }
%"struct.std::_Atomic_integral.5" = type { %"struct.std::_Atomic_storage.6" }
%"struct.std::_Atomic_storage.6" = type { %"struct.std::_Atomic_padded.7" }
%"struct.std::_Atomic_padded.7" = type { i64 }
%"struct.std::atomic_flag" = type { %"struct.std::atomic" }
%"struct.std::atomic" = type { %"struct.std::_Atomic_integral_facade" }
%"struct.std::_Atomic_integral_facade" = type { %"struct.std::_Atomic_integral" }
%"struct.std::_Atomic_integral" = type { %"struct.std::_Atomic_storage" }
%"struct.std::_Atomic_storage" = type { %"struct.std::_Atomic_padded" }
%"struct.std::_Atomic_padded" = type { i32 }

define fastcc void @"?test_case@?A0x7E1854EA@@YAXXZ"() #0 personality ptr @__CxxFrameHandler3 {
  %1 = alloca [0 x [0 x %struct.anon]], i32 0, align 64
  %2 = cmpxchg ptr %1, i128 0, i128 0 monotonic monotonic, align 16
  invoke void @"?_Throw_Cpp_error@std@@YAXH@Z"(i32 0)
          to label %3 unwind label %4

3:                                                ; preds = %0
 unreachable

4:                                                ; preds = %0
  %5 = cleanuppad within none []
  ret void
}

declare i32 @__CxxFrameHandler3(...)

declare void @"?_Throw_Cpp_error@std@@YAXH@Z"()

; uselistorder directives
uselistorder i32 0, { 1, 0 }

attributes #0 = { "target-cpu"="nehalem" }
```

Here is the resulting object code:
(`clang-19 -cc1 -emit-obj -triple "amd64-pc-windows-msvc19.41.34123" -O3 reduced.ll -o - | llvm-objdump-19 -M intel -d -`)
```asm
0000000000000000 <?test_case@?A0x7E1854EA@@YAXXZ>:
       0: 55 push    rbp
       1: 53 push    rbx
       2: 48 83 ec 68                   sub     rsp, 0x68
 6: 48 8d 6c 24 60                lea     rbp, [rsp + 0x60]
       b: 48 83 e4 c0                   and     rsp, -0x40
       f: 48 89 e3 mov     rbx, rsp
      12: 48 89 6b 58                   mov qword ptr [rbx + 0x58], rbp
      16: 48 c7 45 00 fe ff ff ff       mov qword ptr [rbp], -0x2
      1e: 49 89 d8                      mov     r8, rbx
      21: 45 31 c9                      xor     r9d, r9d
      24: 31 c0                         xor     eax, eax
      26: 31 d2 xor     edx, edx
      28: 31 c9                         xor     ecx, ecx
      2a: 4c 89 cb                      mov     rbx, r9
      2d: f0 lock
      2e: 48 0f c7 4b 40 cmpxchg16b      xmmword ptr [rbx + 0x40]
      33: 4c 89 c3 mov     rbx, r8
      36: 31 c9                         xor     ecx, ecx
      38: e8 00 00 00 00                call    0x3d <?test_case@?A0x7E1854EA@@YAXXZ+0x3d>
      3d: cc int3
      3e: 66 90                         nop
```

Note `mov     rbx, r9` followed by `cmpxchg16b      xmmword ptr [rbx + 0x40]` where `rbx` is used after having just been overwritten for the purposes of `cmpxchg16b` which uses it as an input register.

The original unreduced input produces slightly different object code but has the same problem:
```asm
00007FF786689459  lea         r8,[rbx+100h] 
00007FF786689460  mov         rax,qword ptr [rbx+140h]  
00007FF786689467 mov         rdx,qword ptr [rbx+148h]  
00007FF78668946E  nop 
00007FF786689470  mov         r9,rbx  
00007FF786689473  xor ecx,ecx  
00007FF786689475  mov         rbx,r8  
00007FF786689478  lock cmpxchg16b oword ptr [rbx+140h]
```

</pre>
<img width="1" height="1" alt="" src="http://email.email.llvm.org/o/eJy0WE2TozjS_jWqSwYOIT4MBx9c1e3o9_DuRszOYXYvDiEJmx6BWEnYrn-_IQmM7XJ1uWp2CcJgyOfJD6UyJagxza4TYoWyZ5R9e6KD3Su9OlBDW_pUKf66-k3wgQkO__cbStYI-zPH4ZQS4bWleicscGqppK9qsICSb4AIEVGLkvUx6skSo2SdEP_j_sbXf91dnvqfqDnfxKRAyTomRVQXeLzr_KN8gudp9A8nRshsh9VNL8VkA215nkY9i45Nx9XRRK05sLhcpPEiSWOSBKg7SWasHphd0E51Hm5fewFo-QwoeyZwAkQyRMgohUjhBF9bNRjoaCtMT5lApHRRStat3Zo91YJvVfVTMOv0ZN8QeRlZmKTGLA6mDeLUqrZh26azejDNQWx7qx3EyWfPaQEnaIorgtEMY_klwyKZUVn-KGpbS7qbgTmegICW30Js_orjN8G85z8XljYy3FfUXIVCi5qpobOTgQ864iMYwnffn6-ZcM-Z0Zi7VG9JthU1wjO9De6nKT6MbAjMrbr3hS4Jmzz9GPUmaR82akEeMmsU-zjsd-1JXj6bsMn3me6jYfpfqbx0dkzk28H4RQH4RbDO4tv12XSx01Rua8ooF4v0HZcfBH5e8SL7rMYR8bgqY5WmO7HIH9V0DXhcUU85F3yxfFTPlfyvZt6vy90jJt6vA7-Q-nIKfTGBvqj0s9q-ljqfTJwvpc3nkuZtyiRnOF5zUTedgJoayxgcVMMBpdh1xmRjhbFb5hpIilGyWePT8ntcZOn3tXuQ4n-u__jjX76JFoiUgEiCoRfaqI7Kxr66euTIttuX02mjaSt-0I5LoRNnBsJrcN7G3jgqpWLU9WDXgqfL1UrLLU_CCsU5gN0Nlc2ugzyduIKjrO1PbL8L6kkWe0hMioAJd9CqTlnVNWy-mxnj3DM23UH9KW6Csv19r9Vx-9L3W6G10ijFLvBTQH6gFI8xGc0sPdd0WAWSVkI6yxIYOrfanJ-kYVASlFxAHjtQ8gy9FtyMC9oMO71DpwVle1pJEajT_xq1u8lCwKWg3dD3lMOxsfumg051AsJOwYtqYX0YnQ1z5jFJtQjpeC9LECkWi0UI4IX8V4fjTOTcGYyQjZuDXGjgjRbMNgdhEF5fvTlnmps3PpHwPHWotbqpBitMSH0fHD-FSdhhRKwfvJXfECGd2FMpLtcJ094osP0QLhYG7F6AFmaQtul2ENo-MMXFuKkiBcoxk7TbRXEJEWMxRKJtbKSqnxBNe5qH9jMQ_T0BHXZtCykhUhABWr6AlIfW8fGh7b2W_wdXFSVEHCJncQjl5AA1LcJrfHOAW948WkTCuuY8UdwuDrIM-sHs3X9d9fPL2L9MLl6e5pduswdpAUUCgkFe3ElnM1T-qk3vR_SUFw6fT0gOOQOSQo5vkVJQmMwJOwZtekDk2ZHgKdn9UV3YkQJ7QwUAtOOXdkT4lOKZoJ4IShAJtOoAk6_kxUMmyZjMknkF2T2XHfzfR6V5KIrZs65Oo93ZtO-7jHE8BYMtIc0AY6gF1PV4vsvZj1QRPpGZS3iu0tnH7xk3cnn3imDJPKDEj3aaQRIDK--jT0oHdMk9vOQz3Nc7h703AtdwQX1s3eUMz0c4J7MYD2L8QqyYtLxj4aUWFuDsAk69j8xFiFUfRGhMgLmrENf1ocYgFftzfirGEcS1H8QKUjx1xjgflZza9n5SpFfJnCQX9r3NxWIWzP9SHBIfRlG4fDufNwejUrorPiX8UxWGPDuIKzRndT5wjLnalsxPfeDyHMr3M6ZT_dsC_jdlBaAcvx2pHEOtpFRHwaF6dTKfHIgcw3Hv-gPKsaPNsesUgxEcaG2Fhj09uG7xczAWKiE6UAehj7qxVnRQK-27Sj_oXhlhQNXXNgT-hu0dpYHGAjVAO2i6frCgxa4xVuhFcPP3vQClm13TUemXF-GjX5DttXJ_DRjZ7PZWvgJv6lpo0dnLVgbVYGFPQ7MztBUOWLnumKzvN5blZrMs8rwo06yc6zBMJSPEDJHnGOO9_3xzC3PFfBoZD_OT_bYmOoY0MNyhWF4z8HcZivcYvvvceftieWtdiciLy4I7okmYQ2H-CHZXJruh86moi3uiBfjCcVkb1LtRucn6J75KeJmU9Ems4mWSxnmBMX7arwpO07ImcYLLLCmqgvBYYMaqOqkFr9LqqVkRTNKYxCkuCcZ4UZK44HFeYYazLCsoSrFoaSMXbi2yUHr31BgziFUcl2VWPvkVs_Efo93S6gj-bfh2-qRXfgFTDTuDUuxWc2amsY2V_iu2e-KG6cJxn_69cmsdDbVW7dU80tXpadBytbe2Ny5TyQaRza6x-6FaMNUisvGc4RL1WoXPmhtvm0FkMxp_WJH_BAAA___71ofI">