[llvm] 00e3721 - AArch64: do not use xzr for ldxp -> stxp dataflow.

Wed Feb 9 04:29:24 PST 2022

Author: Tim Northover
Date: 2022-02-09T12:29:16Z
New Revision: 00e372137c8a604b59b995131cebf2d84f097544

URL: https://github.com/llvm/llvm-project/commit/00e372137c8a604b59b995131cebf2d84f097544
DIFF: https://github.com/llvm/llvm-project/commit/00e372137c8a604b59b995131cebf2d84f097544.diff

LOG: AArch64: do not use xzr for ldxp -> stxp dataflow.

If the result of a cmpxchg is unused, regalloc chooses `xzr` for the defs of
CMP_SWAP_128*. However, on the failure path this gets expanded to a LDXP ->
STXP to store the original value (to ensure no tearing occurred). This
unintentionally nulls out half of the value.

So instead use GPR64common for these defs, so regalloc has to choose a real
one.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrAtomics.td
    llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir
    llvm/test/CodeGen/AArch64/arm64-atomic-128.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index b220929514f9d..7d62b9eba006c 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -490,7 +490,8 @@ def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
 
 let Constraints = "@earlyclobber $RdLo, at earlyclobber $RdHi, at earlyclobber $scratch",
     mayLoad = 1, mayStore = 1 in {
-class cmp_swap_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32common:$scratch),
+class cmp_swap_128 : Pseudo<(outs GPR64common:$RdLo, GPR64common:$RdHi,
+                                  GPR32common:$scratch),
                            (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
                                 GPR64:$newLo, GPR64:$newHi), []>,
                      Sched<[WriteAtomic]>;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir
index f051fe7c604b1..6a6e0b63b103a 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-cmpxchg-128.mir
@@ -30,35 +30,37 @@ body:             |
     ; CHECK: RET_ReallyLR
     ; CHECK-NOLSE-LABEL: name: compare_swap_128
     ; CHECK-NOLSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4
-    ; CHECK-NOLSE: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
-    ; CHECK-NOLSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK-NOLSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
-    ; CHECK-NOLSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
-    ; CHECK-NOLSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
-    ; CHECK-NOLSE: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64)
-    ; CHECK-NOLSE: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
-    ; CHECK-NOLSE: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
-    ; CHECK-NOLSE: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
-    ; CHECK-NOLSE: early-clobber %13:gpr64(s64), early-clobber %14:gpr64(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
-    ; CHECK-NOLSE: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
-    ; CHECK-NOLSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
-    ; CHECK-NOLSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
-    ; CHECK-NOLSE: RET_ReallyLR
+    ; CHECK-NOLSE-NEXT: {{  $}}
+    ; CHECK-NOLSE-NEXT: [[COPY:%[0-9]+]]:gpr64(p0) = COPY $x0
+    ; CHECK-NOLSE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-NOLSE-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-NOLSE-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-NOLSE-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
+    ; CHECK-NOLSE-NEXT: [[COPY5:%[0-9]+]]:gpr64(s64) = COPY [[COPY1]](s64)
+    ; CHECK-NOLSE-NEXT: [[COPY6:%[0-9]+]]:gpr64(s64) = COPY [[COPY2]](s64)
+    ; CHECK-NOLSE-NEXT: [[COPY7:%[0-9]+]]:gpr64(s64) = COPY [[COPY3]](s64)
+    ; CHECK-NOLSE-NEXT: [[COPY8:%[0-9]+]]:gpr64(s64) = COPY [[COPY4]](s64)
+    ; CHECK-NOLSE-NEXT: early-clobber %13:gpr64common(s64), early-clobber %14:gpr64common(s64), early-clobber %16:gpr32common = CMP_SWAP_128_ACQUIRE [[COPY]](p0), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64) :: (load store acquire acquire (s128))
+    ; CHECK-NOLSE-NEXT: [[COPY9:%[0-9]+]]:gpr64 = COPY %16
+    ; CHECK-NOLSE-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES %13(s64), %14(s64)
+    ; CHECK-NOLSE-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
+    ; CHECK-NOLSE-NEXT: RET_ReallyLR
     ; CHECK-LSE-LABEL: name: compare_swap_128
     ; CHECK-LSE: liveins: $x0_x1, $x1, $x0, $x1, $x2, $x3, $x4
-    ; CHECK-LSE: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0
-    ; CHECK-LSE: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
-    ; CHECK-LSE: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
-    ; CHECK-LSE: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
-    ; CHECK-LSE: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
-    ; CHECK-LSE: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64
-    ; CHECK-LSE: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64
-    ; CHECK-LSE: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire (s128))
-    ; CHECK-LSE: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0
-    ; CHECK-LSE: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64
-    ; CHECK-LSE: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64)
-    ; CHECK-LSE: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
-    ; CHECK-LSE: RET_ReallyLR
+    ; CHECK-LSE-NEXT: {{  $}}
+    ; CHECK-LSE-NEXT: [[COPY:%[0-9]+]]:gpr64sp(p0) = COPY $x0
+    ; CHECK-LSE-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
+    ; CHECK-LSE-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
+    ; CHECK-LSE-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
+    ; CHECK-LSE-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
+    ; CHECK-LSE-NEXT: [[REG_SEQUENCE:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY1]](s64), %subreg.sube64, [[COPY2]](s64), %subreg.subo64
+    ; CHECK-LSE-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:xseqpairsclass(s128) = REG_SEQUENCE [[COPY3]](s64), %subreg.sube64, [[COPY4]](s64), %subreg.subo64
+    ; CHECK-LSE-NEXT: [[CASPAX:%[0-9]+]]:xseqpairsclass(s128) = CASPAX [[REG_SEQUENCE]](s128), [[REG_SEQUENCE1]](s128), [[COPY]](p0) :: (load store acquire acquire (s128))
+    ; CHECK-LSE-NEXT: [[EXTRACT:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 0
+    ; CHECK-LSE-NEXT: [[EXTRACT1:%[0-9]+]]:_(s64) = G_EXTRACT [[CASPAX]](s128), 64
+    ; CHECK-LSE-NEXT: [[MV:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[EXTRACT]](s64), [[EXTRACT1]](s64)
+    ; CHECK-LSE-NEXT: G_STORE [[MV]](s128), [[COPY]](p0) :: (store (s128))
+    ; CHECK-LSE-NEXT: RET_ReallyLR
     %0:_(p0) = COPY $x0
     %3:_(s64) = COPY $x1
     %4:_(s64) = COPY $x2

diff  --git a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
index d6aabd1bb6911..2f8d06e133555 100644
--- a/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-atomic-128.ll
@@ -474,3 +474,52 @@ define void @atomic_store_relaxed(i128 %in, i128* %p) {
    store atomic i128 %in, i128* %p unordered, align 16
    ret void
 }
+
+; Since we store the original value to ensure no tearing for the unsuccessful
+; case, the register used must not be xzr.
+define void @cmpxchg_dead(i128* %ptr, i128 %desired, i128 %new) {
+; NOOUTLINE-LABEL: cmpxchg_dead:
+; NOOUTLINE:       // %bb.0:
+; NOOUTLINE-NEXT:  .LBB17_1: // =>This Inner Loop Header: Depth=1
+; NOOUTLINE-NEXT:    ldxp x8, x9, [x0]
+; NOOUTLINE-NEXT:    cmp x8, x2
+; NOOUTLINE-NEXT:    cset w10, ne
+; NOOUTLINE-NEXT:    cmp x9, x3
+; NOOUTLINE-NEXT:    cinc w10, w10, ne
+; NOOUTLINE-NEXT:    cbz w10, .LBB17_3
+; NOOUTLINE-NEXT:  // %bb.2: // in Loop: Header=BB17_1 Depth=1
+; NOOUTLINE-NEXT:    stxp w10, x8, x9, [x0]
+; NOOUTLINE-NEXT:    cbnz w10, .LBB17_1
+; NOOUTLINE-NEXT:    b .LBB17_4
+; NOOUTLINE-NEXT:  .LBB17_3: // in Loop: Header=BB17_1 Depth=1
+; NOOUTLINE-NEXT:    stxp w10, x4, x5, [x0]
+; NOOUTLINE-NEXT:    cbnz w10, .LBB17_1
+; NOOUTLINE-NEXT:  .LBB17_4:
+; NOOUTLINE-NEXT:    ret
+;
+; OUTLINE-LABEL: cmpxchg_dead:
+; OUTLINE:       // %bb.0:
+; OUTLINE-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
+; OUTLINE-NEXT:    .cfi_def_cfa_offset 16
+; OUTLINE-NEXT:    .cfi_offset w30, -16
+; OUTLINE-NEXT:    mov x1, x3
+; OUTLINE-NEXT:    mov x8, x0
+; OUTLINE-NEXT:    mov x0, x2
+; OUTLINE-NEXT:    mov x2, x4
+; OUTLINE-NEXT:    mov x3, x5
+; OUTLINE-NEXT:    mov x4, x8
+; OUTLINE-NEXT:    bl __aarch64_cas16_relax
+; OUTLINE-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
+; OUTLINE-NEXT:    ret
+;
+; LSE-LABEL: cmpxchg_dead:
+; LSE:       // %bb.0:
+; LSE-NEXT:    // kill: def $x5 killed $x5 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3
+; LSE-NEXT:    // kill: def $x4 killed $x4 killed $x4_x5 def $x4_x5
+; LSE-NEXT:    // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3
+; LSE-NEXT:    casp x2, x3, x4, x5, [x0]
+; LSE-NEXT:    ret
+  cmpxchg i128* %ptr, i128 %desired, i128 %new monotonic monotonic
+  ret void
+}