[llvm] 0df1a52 - [AArch64][FastISel] Fallback on atomic stlr/cas with non-reg operands. (#133987)

Thu May 8 14:29:27 PDT 2025

Author: Ahmed Bougacha
Date: 2025-05-08T14:29:24-07:00
New Revision: 0df1a52852f570fb72c25f88f94f9b51e4689f1d

URL: https://github.com/llvm/llvm-project/commit/0df1a52852f570fb72c25f88f94f9b51e4689f1d
DIFF: https://github.com/llvm/llvm-project/commit/0df1a52852f570fb72c25f88f94f9b51e4689f1d.diff

LOG: [AArch64][FastISel] Fallback on atomic stlr/cas with non-reg operands. (#133987)

This has been a latent bug for almost 10 years, but is relatively hard
to trigger, needing an address operand that isn't handled by
getRegForValue (in the test here, constexpr casts). When that happens,
it returns 0, which FastISel happily uses as a register operand, all the
way to asm, where we either get a crash on an invalid register, or a
silently corrupt instruction.

Unfortunately, FastISel is still enabled at -O0 for at least
ILP32/arm64_32.

Added: 
    llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64FastISel.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 5ef439f8224c1..5ddf83f45ac69 100644

--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -2190,6 +2190,8 @@ bool AArch64FastISel::selectStore(const Instruction *I) {
     if (isReleaseOrStronger(Ord)) {
       // The STLR addressing mode only supports a base reg; pass that directly.
       Register AddrReg = getRegForValue(PtrV);
+      if (!AddrReg)
+        return false;
       return emitStoreRelease(VT, SrcReg, AddrReg,
                               createMachineMemOperandFor(I));
     }
@@ -5070,12 +5072,16 @@ bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
 
   const MCInstrDesc &II = TII.get(Opc);
 
-  const Register AddrReg = constrainOperandRegClass(
-      II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
-  const Register DesiredReg = constrainOperandRegClass(
-      II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
-  const Register NewReg = constrainOperandRegClass(
-      II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
+  Register AddrReg = getRegForValue(I->getPointerOperand());
+  Register DesiredReg = getRegForValue(I->getCompareOperand());
+  Register NewReg = getRegForValue(I->getNewValOperand());
+
+  if (!AddrReg || !DesiredReg || !NewReg)
+    return false;
+
+  AddrReg = constrainOperandRegClass(II, AddrReg, II.getNumDefs());
+  DesiredReg = constrainOperandRegClass(II, DesiredReg, II.getNumDefs() + 1);
+  NewReg = constrainOperandRegClass(II, NewReg, II.getNumDefs() + 2);
 
   const Register ResultReg1 = createResultReg(ResRC);
   const Register ResultReg2 = createResultReg(&AArch64::GPR32RegClass);

diff  --git a/llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll b/llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll
new file mode 100644
index 0000000000000..16ef0cbc8a810
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-atomic-fallback.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm64_32-apple-darwin -O0 -fast-isel -verify-machineinstrs \
+; RUN:   -aarch64-enable-atomic-cfg-tidy=0 -aarch64-enable-collect-loh=0 \
+; RUN:   < %s | FileCheck %s
+
+; FastISel doesn't support cstexprs as operands here, but make
+; sure it knows to fallback, at least.
+
+define void @atomic_store_cstexpr_addr(i32 %val) #0 {
+; CHECK-LABEL: atomic_store_cstexpr_addr:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    adrp x8, _g at PAGE
+; CHECK-NEXT:    add x8, x8, _g at PAGEOFF
+; CHECK-NEXT:    ; kill: def $w1 killed $w8 killed $x8
+; CHECK-NEXT:    adrp x8, _g at PAGE
+; CHECK-NEXT:    add x8, x8, _g at PAGEOFF
+; CHECK-NEXT:    stlr w0, [x8]
+; CHECK-NEXT:    ret
+  store atomic i32 %val, ptr inttoptr (i32 ptrtoint (ptr @g to i32) to ptr) release, align 4
+  ret void
+}
+
+define i32 @cmpxchg_cstexpr_addr(i32 %cmp, i32 %new, ptr %ps) #0 {
+; CHECK-LABEL: cmpxchg_cstexpr_addr:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    mov w8, w0
+; CHECK-NEXT:    adrp x10, _g at PAGE
+; CHECK-NEXT:    add x10, x10, _g at PAGEOFF
+; CHECK-NEXT:  LBB1_1: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldaxr w0, [x10]
+; CHECK-NEXT:    cmp w0, w8
+; CHECK-NEXT:    b.ne LBB1_3
+; CHECK-NEXT:  ; %bb.2: ; in Loop: Header=BB1_1 Depth=1
+; CHECK-NEXT:    stlxr w9, w1, [x10]
+; CHECK-NEXT:    cbnz w9, LBB1_1
+; CHECK-NEXT:  LBB1_3:
+; CHECK-NEXT:    subs w8, w0, w8
+; CHECK-NEXT:    cset w8, eq
+; CHECK-NEXT:    ; kill: def $w1 killed $w8
+; CHECK-NEXT:    str w8, [x2]
+; CHECK-NEXT:    ret
+  %tmp0 = cmpxchg ptr inttoptr (i32 ptrtoint (ptr @g to i32) to ptr), i32 %cmp, i32 %new seq_cst seq_cst
+  %tmp1 = extractvalue { i32, i1 } %tmp0, 0
+  %tmp2 = extractvalue { i32, i1 } %tmp0, 1
+  %tmp3 = zext i1 %tmp2 to i32
+  store i32 %tmp3, ptr %ps
+  ret i32 %tmp1
+}
+
+ at g = global i32 0