[llvm] af4dc63 - [AArch64][GlobalISel] Fix atomic truncating stores from generating invalid copies.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 9 20:47:55 PST 2021
Author: Amara Emerson
Date: 2021-11-09T20:47:50-08:00
New Revision: af4dc633f86f62ae587b25ed2a17b6bf5fe7a3ac
URL: https://github.com/llvm/llvm-project/commit/af4dc633f86f62ae587b25ed2a17b6bf5fe7a3ac
DIFF: https://github.com/llvm/llvm-project/commit/af4dc633f86f62ae587b25ed2a17b6bf5fe7a3ac.diff
LOG: [AArch64][GlobalISel] Fix atomic truncating stores from generating invalid copies.
If the source reg is a 64b vreg, then we need to emit a subreg copy to a 32b
gpr before we select sub-64b variants like STLRW.
Added:
llvm/test/CodeGen/AArch64/GlobalISel/select-truncstore-atomic.mir
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 381f50f62e9b8..580ba8815bcec 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2770,6 +2770,14 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
} else {
static unsigned Opcodes[] = {AArch64::STLRB, AArch64::STLRH,
AArch64::STLRW, AArch64::STLRX};
+ Register ValReg = LdSt.getReg(0);
+ if (MRI.getType(ValReg).getSizeInBits() == 64 && MemSizeInBits != 64) {
+ // Emit a subreg copy of 32 bits.
+ Register NewVal = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+ MIB.buildInstr(TargetOpcode::COPY, {NewVal}, {})
+ .addReg(I.getOperand(0).getReg(), 0, AArch64::sub_32);
+ I.getOperand(0).setReg(NewVal);
+ }
I.setDesc(TII.get(Opcodes[Log2_32(MemSizeInBytes)]));
}
constrainSelectedInstRegOperands(I, TII, TRI, RBI);
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-truncstore-atomic.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-truncstore-atomic.mir
new file mode 100644
index 0000000000000..8b5b33bf7d54e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-truncstore-atomic.mir
@@ -0,0 +1,150 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-- -run-pass=instruction-select -verify-machineinstrs -global-isel-abort=1 %s -o - | FileCheck %s
+
+---
+name: truncstore_atomic_32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+body: |
+ ; CHECK-LABEL: name: truncstore_atomic_32
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $w1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
+ ; CHECK-NEXT: STLRW [[COPY2]], [[COPY]] :: (store release (s32))
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.1:
+ liveins: $w1, $x0
+
+ %0:gpr(p0) = COPY $x0
+ %3:gpr(s32) = COPY $w1
+ %2:gpr(s8) = G_TRUNC %3(s32)
+ %4:gpr(s8) = G_ASSERT_ZEXT %2, 1
+ %1:gpr(s1) = G_TRUNC %4(s8)
+ G_BRCOND %1(s1), %bb.3
+ G_BR %bb.2
+
+ bb.2:
+ %8:gpr(s64) = G_CONSTANT i64 4
+ G_STORE %8(s64), %0(p0) :: (store release (s32))
+
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: truncstore_atomic_16
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+body: |
+ ; CHECK-LABEL: name: truncstore_atomic_16
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $w1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
+ ; CHECK-NEXT: STLRH [[COPY2]], [[COPY]] :: (store release (s16))
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.1:
+ liveins: $w1, $x0
+
+ %0:gpr(p0) = COPY $x0
+ %3:gpr(s32) = COPY $w1
+ %2:gpr(s8) = G_TRUNC %3(s32)
+ %4:gpr(s8) = G_ASSERT_ZEXT %2, 1
+ %1:gpr(s1) = G_TRUNC %4(s8)
+ G_BRCOND %1(s1), %bb.3
+ G_BR %bb.2
+
+ bb.2:
+ %8:gpr(s64) = G_CONSTANT i64 4
+ G_STORE %8(s64), %0(p0) :: (store release (s16))
+
+ bb.3:
+ RET_ReallyLR
+
+...
+---
+name: truncstore_atomic_8
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+liveins:
+ - { reg: '$x0' }
+ - { reg: '$w1' }
+body: |
+ ; CHECK-LABEL: name: truncstore_atomic_8
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.1(0x40000000)
+ ; CHECK-NEXT: liveins: $w1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr32 = COPY $w1
+ ; CHECK-NEXT: TBNZW [[COPY1]], 0, %bb.2
+ ; CHECK-NEXT: B %bb.1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[MOVi32imm:%[0-9]+]]:gpr32 = MOVi32imm 4
+ ; CHECK-NEXT: [[SUBREG_TO_REG:%[0-9]+]]:gpr64all = SUBREG_TO_REG 0, [[MOVi32imm]], %subreg.sub_32
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr32 = COPY [[SUBREG_TO_REG]].sub_32
+ ; CHECK-NEXT: STLRB [[COPY2]], [[COPY]] :: (store release (s8))
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: RET_ReallyLR
+ bb.1:
+ liveins: $w1, $x0
+
+ %0:gpr(p0) = COPY $x0
+ %3:gpr(s32) = COPY $w1
+ %2:gpr(s8) = G_TRUNC %3(s32)
+ %4:gpr(s8) = G_ASSERT_ZEXT %2, 1
+ %1:gpr(s1) = G_TRUNC %4(s8)
+ G_BRCOND %1(s1), %bb.3
+ G_BR %bb.2
+
+ bb.2:
+ %8:gpr(s64) = G_CONSTANT i64 4
+ G_STORE %8(s64), %0(p0) :: (store release (s8))
+
+ bb.3:
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list