[llvm] c8b3d7d - [AArch64][GlobalISel] Ensure atomic loads always get assigned GPR destinations
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 15 17:05:31 PDT 2021
Author: Jessica Paquette
Date: 2021-09-15T17:05:09-07:00
New Revision: c8b3d7d6d6de37af68b2f379d0e37304f78e115f
URL: https://github.com/llvm/llvm-project/commit/c8b3d7d6d6de37af68b2f379d0e37304f78e115f
DIFF: https://github.com/llvm/llvm-project/commit/c8b3d7d6d6de37af68b2f379d0e37304f78e115f.diff
LOG: [AArch64][GlobalISel] Ensure atomic loads always get assigned GPR destinations
The default register bank selection code for G_LOAD assumes that we ought to
use a FPR when the load is casted to a float/double.
For atomics, this isn't true; we should always use GPRs.
Without this patch, we crash in the following example:
https://godbolt.org/z/MThjas441
Also make the code a little more stylistically consistent while we're here.
Also test some other weird cast combinations as well.
Differential Revision: https://reviews.llvm.org/D109771
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
index ffe64c56d419d..5af0b2d9e202b 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64RegisterBankInfo.cpp
@@ -15,6 +15,7 @@
#include "AArch64InstrInfo.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
@@ -751,24 +752,33 @@ AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
// for the greedy mode the cost of the cross bank copy will
// offset this number.
// FIXME: Should be derived from the scheduling model.
- if (OpRegBankIdx[0] != PMI_FirstGPR)
+ if (OpRegBankIdx[0] != PMI_FirstGPR) {
Cost = 2;
- else
- // Check if that load feeds fp instructions.
- // In that case, we want the default mapping to be on FPR
- // instead of blind map every scalar to GPR.
- for (const MachineInstr &UseMI :
- MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
- // If we have at least one direct use in a FP instruction,
- // assume this was a floating point load in the IR.
- // If it was not, we would have had a bitcast before
- // reaching that instruction.
- // Int->FP conversion operations are also captured in onlyDefinesFP().
- if (onlyUsesFP(UseMI, MRI, TRI) || onlyDefinesFP(UseMI, MRI, TRI)) {
- OpRegBankIdx[0] = PMI_FirstFPR;
- break;
- }
- }
+ break;
+ }
+
+ if (cast<GLoad>(MI).isAtomic()) {
+ // Atomics always use GPR destinations. Don't refine any further.
+ OpRegBankIdx[0] = PMI_FirstGPR;
+ break;
+ }
+
+ // Check if that load feeds fp instructions.
+ // In that case, we want the default mapping to be on FPR
+ // instead of blind map every scalar to GPR.
+ if (any_of(MRI.use_nodbg_instructions(MI.getOperand(0).getReg()),
+ [&](const MachineInstr &UseMI) {
+ // If we have at least one direct use in a FP instruction,
+ // assume this was a floating point load in the IR. If it was
+ // not, we would have had a bitcast before reaching that
+ // instruction.
+ //
+ // Int->FP conversion operations are also captured in
+ // onlyDefinesFP().
+ return onlyUsesFP(UseMI, MRI, TRI) ||
+ onlyDefinesFP(UseMI, MRI, TRI);
+ }))
+ OpRegBankIdx[0] = PMI_FirstFPR;
break;
case TargetOpcode::G_STORE:
// Check if that store is fed by fp instructions.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
index 0308874c74b25..633c6e8c20325 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll
@@ -2825,4 +2825,115 @@ define { i16, i1 } @cmpxchg_i16(i16* %ptr, i16 %desired, i16 %new) {
ret { i16, i1 } %res
}
+define internal double @bitcast_to_double(i64* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_double:
+; CHECK-NOLSE: ; %bb.0:
+; CHECK-NOLSE-NEXT: ldar x8, [x0]
+; CHECK-NOLSE-NEXT: fmov d0, x8
+; CHECK-NOLSE-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_double:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldar x8, [x0]
+; CHECK-LSE-O1-NEXT: fmov d0, x8
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_double:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldar x8, [x0]
+; CHECK-LSE-O0-NEXT: fmov d0, x8
+; CHECK-LSE-O0-NEXT: ret
+ %load = load atomic i64, i64* %ptr seq_cst, align 8
+ %bitcast = bitcast i64 %load to double
+ ret double %bitcast
+}
+
+define internal float @bitcast_to_float(i32* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_float:
+; CHECK-NOLSE: ; %bb.0:
+; CHECK-NOLSE-NEXT: ldar w8, [x0]
+; CHECK-NOLSE-NEXT: fmov s0, w8
+; CHECK-NOLSE-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_float:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldar w8, [x0]
+; CHECK-LSE-O1-NEXT: fmov s0, w8
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_float:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldar w8, [x0]
+; CHECK-LSE-O0-NEXT: fmov s0, w8
+; CHECK-LSE-O0-NEXT: ret
+ %load = load atomic i32, i32* %ptr seq_cst, align 8
+ %bitcast = bitcast i32 %load to float
+ ret float %bitcast
+}
+
+define internal half @bitcast_to_half(i16* %ptr) {
+; CHECK-NOLSE-LABEL: bitcast_to_half:
+; CHECK-NOLSE: ; %bb.0:
+; CHECK-NOLSE-NEXT: ldarh w8, [x0]
+; CHECK-NOLSE-NEXT: fmov s0, w8
+; CHECK-NOLSE-NEXT: ; kill: def $h0 killed $h0 killed $s0
+; CHECK-NOLSE-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: bitcast_to_half:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldarh w8, [x0]
+; CHECK-LSE-O1-NEXT: fmov s0, w8
+; CHECK-LSE-O1-NEXT: ; kill: def $h0 killed $h0 killed $s0
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: bitcast_to_half:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldarh w8, [x0]
+; CHECK-LSE-O0-NEXT: fmov s0, w8
+; CHECK-LSE-O0-NEXT: ; kill: def $h0 killed $h0 killed $s0
+; CHECK-LSE-O0-NEXT: ret
+ %load = load atomic i16, i16* %ptr seq_cst, align 8
+ %bitcast = bitcast i16 %load to half
+ ret half %bitcast
+}
+
+define internal i64* @inttoptr(i64* %ptr) {
+; CHECK-NOLSE-LABEL: inttoptr:
+; CHECK-NOLSE: ; %bb.0:
+; CHECK-NOLSE-NEXT: ldar x0, [x0]
+; CHECK-NOLSE-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: inttoptr:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldar x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: inttoptr:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldar x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %load = load atomic i64, i64* %ptr seq_cst, align 8
+ %bitcast = inttoptr i64 %load to i64*
+ ret i64* %bitcast
+}
+
+define internal i64* @load_ptr(i64** %ptr) {
+; CHECK-NOLSE-LABEL: load_ptr:
+; CHECK-NOLSE: ; %bb.0:
+; CHECK-NOLSE-NEXT: ldar x0, [x0]
+; CHECK-NOLSE-NEXT: ret
+;
+; CHECK-LSE-O1-LABEL: load_ptr:
+; CHECK-LSE-O1: ; %bb.0:
+; CHECK-LSE-O1-NEXT: ldar x0, [x0]
+; CHECK-LSE-O1-NEXT: ret
+;
+; CHECK-LSE-O0-LABEL: load_ptr:
+; CHECK-LSE-O0: ; %bb.0:
+; CHECK-LSE-O0-NEXT: ldar x0, [x0]
+; CHECK-LSE-O0-NEXT: ret
+ %load = load atomic i64*, i64** %ptr seq_cst, align 8
+ ret i64* %load
+}
+
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list