[llvm] c8b3d7d - [AArch64][GlobalISel] Ensure atomic loads always get assigned GPR destinations

Wed Sep 15 17:05:31 PDT 2021

Author: Jessica Paquette
Date: 2021-09-15T17:05:09-07:00
New Revision: c8b3d7d6d6de37af68b2f379d0e37304f78e115f

URL: https://github.com/llvm/llvm-project/commit/c8b3d7d6d6de37af68b2f379d0e37304f78e115f
DIFF: https://github.com/llvm/llvm-project/commit/c8b3d7d6d6de37af68b2f379d0e37304f78e115f.diff

LOG: [AArch64][GlobalISel] Ensure atomic loads always get assigned GPR destinations

The default register bank selection code for G_LOAD assumes that we ought to
use a FPR when the load is casted to a float/double.

For atomics, this isn't true; we should always use GPRs.

Without this patch, we crash in the following example:

https://godbolt.org/z/MThjas441

Also make the code a little more stylistically consistent while we're here.

Also test some other weird cast combinations as well.

Differential Revision: https://reviews.llvm.org/D109771

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
    llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index ffe64c56d419d..5af0b2d9e202b 100644

--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -15,6 +15,7 @@
 #include "AArch64InstrInfo.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -751,24 +752,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     // for the greedy mode the cost of the cross bank copy will
     // offset this number.
     // FIXME: Should be derived from the scheduling model.
-    if (OpRegBankIdx[0] != PMI_FirstGPR)
+    if (OpRegBankIdx[0] != PMI_FirstGPR) {
       Cost = 2;
-    else
-      // Check if that load feeds fp instructions.
-      // In that case, we want the default mapping to be on FPR
-      // instead of blind map every scalar to GPR.
-      for (const MachineInstr &UseMI :
-           MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
-        // If we have at least one direct use in a FP instruction,
-        // assume this was a floating point load in the IR.
-        // If it was not, we would have had a bitcast before
-        // reaching that instruction.
-        // Int->FP conversion operations are also captured in onlyDefinesFP().
-        if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
-          OpRegBankIdx[0] = PMI_FirstFPR;
-          break;
-        }
-      }
+      break;
+    }
+
+    if (cast<GLoad>(MI).isAtomic()) {
+      // Atomics always use GPR destinations. Don't refine any further.
+      OpRegBankIdx[0] = PMI_FirstGPR;
+      break;
+    }
+
+    // Check if that load feeds fp instructions.
+    // In that case, we want the default mapping to be on FPR
+    // instead of blind map every scalar to GPR.
+    if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+               [&](const MachineInstr &UseMI) {
+                 // If we have at least one direct use in a FP instruction,
+                 // assume this was a floating point load in the IR. If it was
+                 // not, we would have had a bitcast before reaching that
+                 // instruction.
+                 //
+                 // Int->FP conversion operations are also captured in
+                 // onlyDefinesFP().
+                 return onlyUsesFP(UseMI, MRI, TRI) ||
+                        onlyDefinesFP(UseMI, MRI, TRI);
+               }))
+      OpRegBankIdx[0] = PMI_FirstFPR;
     break;
   case TargetOpcode::G_STORE:
     // Check if that store is fed by fp instructions.

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 0308874c74b25..633c6e8c20325 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -2825,4 +2825,115 @@ define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) {
   ret { i16, i1 } %res
 }
 
+define internal double @bitcast_to_double(i64* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_double:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar x8, [x0]
+; CHECK-NOLSE-NEXT:    fmov d0, x8
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_double:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar x8, [x0]
+; CHECK-LSE-O1-NEXT:    fmov d0, x8
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_double:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar x8, [x0]
+; CHECK-LSE-O0-NEXT:    fmov d0, x8
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i64, i64* %ptr seq_cst, align 8
+  %bitcast = bitcast i64 %load to double
+  ret double %bitcast
+}
+
+define internal float @bitcast_to_float(i32* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_float:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar w8, [x0]
+; CHECK-NOLSE-NEXT:    fmov s0, w8
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_float:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar w8, [x0]
+; CHECK-LSE-O1-NEXT:    fmov s0, w8
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_float:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar w8, [x0]
+; CHECK-LSE-O0-NEXT:    fmov s0, w8
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i32, i32* %ptr seq_cst, align 8
+  %bitcast = bitcast i32 %load to float
+  ret float %bitcast
+}
+
+define internal half @bitcast_to_half(i16* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_half:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldarh w8, [x0]
+; CHECK-NOLSE-NEXT:    fmov s0, w8
+; CHECK-NOLSE-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_half:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldarh w8, [x0]
+; CHECK-LSE-O1-NEXT:    fmov s0, w8
+; CHECK-LSE-O1-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_half:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldarh w8, [x0]
+; CHECK-LSE-O0-NEXT:    fmov s0, w8
+; CHECK-LSE-O0-NEXT:    ; kill: def $h0 killed $h0 killed $s0
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i16, i16* %ptr seq_cst, align 8
+  %bitcast = bitcast i16 %load to half
+  ret half %bitcast
+}
+
+define internal i64* @inttoptr(i64* %ptr) {
+; CHECK-NOLSE-LABEL: inttoptr:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar x0, [x0]
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: inttoptr:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: inttoptr:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i64, i64* %ptr seq_cst, align 8
+  %bitcast = inttoptr i64 %load to i64*
+  ret i64* %bitcast
+}
+
+define internal i64* @load_ptr(i64** %ptr) {
+; CHECK-NOLSE-LABEL: load_ptr:
+; CHECK-NOLSE:       ; %bb.0:
+; CHECK-NOLSE-NEXT:    ldar x0, [x0]
+; CHECK-NOLSE-NEXT:    ret
+;
+; CHECK-LSE-O1-LABEL: load_ptr:
+; CHECK-LSE-O1:       ; %bb.0:
+; CHECK-LSE-O1-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O1-NEXT:    ret
+;
+; CHECK-LSE-O0-LABEL: load_ptr:
+; CHECK-LSE-O0:       ; %bb.0:
+; CHECK-LSE-O0-NEXT:    ldar x0, [x0]
+; CHECK-LSE-O0-NEXT:    ret
+  %load = load atomic i64*, i64** %ptr seq_cst, align 8
+  ret i64* %load
+}
+
 attributes #0 = { nounwind }