[llvm] 9e0ca57 - [X86] When expanding LCMPXCHG16B_SAVE_RBX, substitute RBX in base (#134109)

Thu Apr 3 06:56:56 PDT 2025

Author: Aaron Puchert
Date: 2025-04-03T15:56:53+02:00
New Revision: 9e0ca5720bee96f4b19eeb69a119b5eda3ab5528

URL: https://github.com/llvm/llvm-project/commit/9e0ca5720bee96f4b19eeb69a119b5eda3ab5528
DIFF: https://github.com/llvm/llvm-project/commit/9e0ca5720bee96f4b19eeb69a119b5eda3ab5528.diff

LOG: [X86] When expanding LCMPXCHG16B_SAVE_RBX, substitute RBX in base (#134109)

The pseudo-instruction LCMPXCHG16B_SAVE_RBX is used when RBX serves as
frame base pointer. At a very late stage it is then translated into a
regular LCMPXCHG16B, preceded by copying the actual argument into RBX,
and followed by restoring the register to the base pointer.

However, in case the `cmpxchg` operates on a local variable, RBX might
also be used as a base for the memory operand in frame finalization, and
we've overwritten RBX with the input operand for `cmpxchg16b`. So we
have to rewrite the memory operand base to use the saved value of RBX.

Fixes #119959.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ExpandPseudo.cpp
    llvm/test/CodeGen/X86/base-pointer-and-cmpxchg.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index c202f7fa93db6..398b738b85697 100644

--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -439,8 +439,18 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
     TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false);
     // Create the actual instruction.
     MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B));
-    // Copy the operands related to the address.
-    for (unsigned Idx = 1; Idx < 6; ++Idx)
+    // Copy the operands related to the address. If we access a frame variable,
+    // we need to replace the RBX base with SaveRbx, as RBX has another value.
+    const MachineOperand &Base = MBBI->getOperand(1);
+    if (Base.getReg() == X86::RBX || Base.getReg() == X86::EBX)
+      NewInstr->addOperand(MachineOperand::CreateReg(
+          Base.getReg() == X86::RBX
+              ? SaveRbx
+              : Register(TRI->getSubReg(SaveRbx, X86::sub_32bit)),
+          /*IsDef=*/false));
+    else
+      NewInstr->addOperand(Base);
+    for (unsigned Idx = 1 + 1; Idx < 1 + X86::AddrNumOperands; ++Idx)
       NewInstr->addOperand(MBBI->getOperand(Idx));
     // Finally, restore the value of RBX.
     TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx,

diff  --git a/llvm/test/CodeGen/X86/base-pointer-and-cmpxchg.ll b/llvm/test/CodeGen/X86/base-pointer-and-cmpxchg.ll
index 498be7c9e1144..5e8da5818fe97 100644
--- a/llvm/test/CodeGen/X86/base-pointer-and-cmpxchg.ll
+++ b/llvm/test/CodeGen/X86/base-pointer-and-cmpxchg.ll
@@ -49,5 +49,39 @@ tail call void asm sideeffect "nop", "~{rax},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},
   store i32 %n, ptr %idx
   ret i1 %res
 }
+
+; If we compare-and-exchange a frame variable, we additionally need to rewrite
+; the memory operand to use the SAVE_rbx instead of rbx, which already contains
+; the input operand.
+;
+; CHECK-LABEL: cmp_and_swap16_frame:
+; Check that we actually use rbx.
+; gnux32 use the 32bit variant of the registers.
+; USE_BASE_64: movq %rsp, %rbx
+; USE_BASE_32: movl %esp, %ebx
+; Here we drop the inline assembly because the frame pointer is used anyway. So
+; rbx is not spilled to the stack but goes into a (hopefully numbered) register.
+; USE_BASE: movq %rbx, [[SAVE_rbx:%r[0-9]+]]
+;
+; USE_BASE: movq {{[^ ]+}}, %rbx
+; The use of the frame variable expands to N(%rbx) or N(%ebx). But we've just
+; overwritten that with the input operand. We need to use SAVE_rbx instead.
+; USE_BASE_64-NEXT: cmpxchg16b {{[0-9]*}}([[SAVE_rbx]])
+; USE_BASE_32-NEXT: cmpxchg16b {{[0-9]*}}([[SAVE_rbx]]d)
+; USE_BASE-NEXT: movq [[SAVE_rbx]], %rbx
+;
+; DONT_USE_BASE-NOT: movq %rsp, %rbx
+; DONT_USE_BASE-NOT: movl %esp, %ebx
+; DONT_USE_BASE: cmpxchg
+define i1 @cmp_and_swap16_frame(i128 %a, i128 %b, i32 %n) {
+  %local = alloca i128, align 16
+  %dummy = alloca i32, i32 %n
+  %cmp = cmpxchg ptr %local, i128 %a, i128 %b seq_cst seq_cst
+  %res = extractvalue { i128, i1 } %cmp, 1
+  %idx = getelementptr i32, ptr %dummy, i32 5
+  store i32 %n, ptr %idx
+  ret i1 %res
+}
+
 !llvm.module.flags = !{!0}
 !0 = !{i32 2, !"override-stack-alignment", i32 32}