[llvm-branch-commits] [llvm] release/22.x: [X86] Correctly call 16 byte atomic helpers on x86_64 Windows (#181356) (PR #181907)

Tue Feb 17 13:05:04 PST 2026

llvmbot wrote:




@llvm/pr-subscribers-backend-x86

Author: None (llvmbot)

<details>
<summary>Changes</summary>

Backport 7679d02d6607a6f1e9908c2a9e1c5f6c9627f300

Requested by: @mstorsjo

---
Full diff: https://github.com/llvm/llvm-project/pull/181907.diff


2 Files Affected:

- (modified) llvm/lib/CodeGen/AtomicExpandPass.cpp (+10) 
- (added) llvm/test/CodeGen/X86/atomic-nocx16-win.ll (+38) 


``````````diff

diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index 6d05888709944..2da10ea444060 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1924,6 +1924,16 @@ bool AtomicExpandImpl::expandAtomicOpToLibcall(
   bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
   Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
 
+  if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
+      Size == 16) {
+    // x86_64 Windows passes i128 as an XMM vector; on return, it is in
+    // XMM0, and as a parameter, it is passed indirectly. The generic lowering
+    // rules handles this correctly if we pass it as a v2i64 rather than
+    // i128. This is what Clang does in the frontend for such types as well
+    // (see WinX86_64ABIInfo::classify in Clang).
+    SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
+  }
+
   const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
 
   // TODO: the "order" argument type is "int", not int32. So
diff --git a/llvm/test/CodeGen/X86/atomic-nocx16-win.ll b/llvm/test/CodeGen/X86/atomic-nocx16-win.ll
new file mode 100644
index 0000000000000..3f0649cd9ae07
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-nocx16-win.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-windows-gnu -verify-machineinstrs -mcpu=corei7 -mattr=-cx16 | FileCheck %s
+
+define void @call_load(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: call_load:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pushq %rsi
+; CHECK-NEXT:    subq $32, %rsp
+; CHECK-NEXT:    movq %rcx, %rsi
+; CHECK-NEXT:    movq %rdx, %rcx
+; CHECK-NEXT:    movl $5, %edx
+; CHECK-NEXT:    callq __atomic_load_16
+; CHECK-NEXT:    movaps %xmm0, (%rsi)
+; CHECK-NEXT:    addq $32, %rsp
+; CHECK-NEXT:    popq %rsi
+; CHECK-NEXT:    retq
+entry:
+  %0 = load atomic i128, ptr %src seq_cst, align 16
+  store i128 %0, ptr %dst, align 16
+  ret void
+}
+
+define void @call_store(ptr %dst, ptr %src) nounwind {
+; CHECK-LABEL: call_store:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    subq $56, %rsp
+; CHECK-NEXT:    movaps (%rdx), %xmm0
+; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; CHECK-NEXT:    movl $5, %r8d
+; CHECK-NEXT:    callq __atomic_store_16
+; CHECK-NEXT:    addq $56, %rsp
+; CHECK-NEXT:    retq
+entry:
+  %0 = load i128, ptr %src, align 16
+  store atomic i128 %0, ptr %dst seq_cst, align 16
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/181907