[llvm] 45417b7 - [AArch64][GlobalISel] Properly implement widening for TB(N)Z
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 09:25:07 PST 2020
Author: Jessica Paquette
Date: 2020-02-12T09:24:58-08:00
New Revision: 45417b7aa7fcdd2aa75c1c809a3c7fb3292134be
URL: https://github.com/llvm/llvm-project/commit/45417b7aa7fcdd2aa75c1c809a3c7fb3292134be
DIFF: https://github.com/llvm/llvm-project/commit/45417b7aa7fcdd2aa75c1c809a3c7fb3292134be.diff
LOG: [AArch64][GlobalISel] Properly implement widening for TB(N)Z
When we have to widen to a 64-bit register, we have to emit a SUBREG_TO_REG.
Add a general-purpose widening helpe which emits the correct SUBREG_TO_REG
instruction based off of a desired size and add a testcase.
Also remove some asserts which are technically incorrect in `emitTestBit`.
- p0 doesn't count as a scalar type, so we need to check `!Ty.isVector()`
instead
- Whenever we have a s1, the Size/Bit checks are too conservative, so just
remove them
Replace these asserts with less conservative ones where applicable.
Differential Revision: https://reviews.llvm.org/D74427
Added:
llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 83f1004c258c..25aba784b197 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -273,6 +273,8 @@ class AArch64InstructionSelector : public InstructionSelector {
/// new copy.
Register narrowExtendRegIfNeeded(Register ExtReg,
MachineIRBuilder &MIB) const;
+ Register widenGPRBankRegIfNeeded(Register Reg, unsigned Size,
+ MachineIRBuilder &MIB) const;
ComplexRendererFns selectArithExtendedRegister(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI,
@@ -1124,26 +1126,25 @@ static Register getTestBitReg(Register Reg, uint64_t &Bit, bool &Invert,
MachineInstr *AArch64InstructionSelector::emitTestBit(
Register TestReg, uint64_t Bit, bool IsNegative, MachineBasicBlock *DstMBB,
MachineIRBuilder &MIB) const {
- MachineRegisterInfo &MRI = *MIB.getMRI();
-#ifndef NDEBUG
+ assert(TestReg.isValid());
assert(ProduceNonFlagSettingCondBr &&
"Cannot emit TB(N)Z with speculation tracking!");
- assert(TestReg.isValid());
- LLT Ty = MRI.getType(TestReg);
- unsigned Size = Ty.getSizeInBits();
- assert(Bit < Size &&
- "Bit to test must be smaler than the size of a test register!");
- assert(Ty.isScalar() && "Expected a scalar!");
- assert(Size >= 32 && "Expected at least a 32-bit register!");
-#endif
+ MachineRegisterInfo &MRI = *MIB.getMRI();
// Attempt to optimize the test bit by walking over instructions.
TestReg = getTestBitReg(TestReg, Bit, IsNegative, MRI);
- bool UseWReg = Bit < 32;
+ LLT Ty = MRI.getType(TestReg);
+ unsigned Size = Ty.getSizeInBits();
+ assert(!Ty.isVector() && "Expected a scalar!");
+ assert(Bit < 64 && "Bit is too large!");
// When the test register is a 64-bit register, we have to narrow to make
// TBNZW work.
- if (UseWReg)
+ bool UseWReg = Bit < 32;
+ unsigned NecessarySize = UseWReg ? 32 : 64;
+ if (Size < NecessarySize)
+ TestReg = widenGPRBankRegIfNeeded(TestReg, NecessarySize, MIB);
+ else if (Size > NecessarySize)
TestReg = narrowExtendRegIfNeeded(TestReg, MIB);
static const unsigned OpcTable[2][2] = {{AArch64::TBZX, AArch64::TBNZX},
@@ -5154,6 +5155,52 @@ Register AArch64InstructionSelector::narrowExtendRegIfNeeded(
return Copy.getReg(0);
}
+Register AArch64InstructionSelector::widenGPRBankRegIfNeeded(
+ Register Reg, unsigned WideSize, MachineIRBuilder &MIB) const {
+ assert(WideSize >= 8 && "WideSize is smaller than all possible registers?");
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NarrowSize = MRI.getType(Reg).getSizeInBits();
+ assert(WideSize >= NarrowSize &&
+ "WideSize cannot be smaller than NarrowSize!");
+
+ // If the sizes match, just return the register.
+ //
+ // If NarrowSize is an s1, then we can select it to any size, so we'll treat
+ // it as a don't care.
+ if (NarrowSize == WideSize || NarrowSize == 1)
+ return Reg;
+
+ // Now check the register classes.
+ const RegisterBank *RB = RBI.getRegBank(Reg, MRI, TRI);
+ const TargetRegisterClass *OrigRC = getMinClassForRegBank(*RB, NarrowSize);
+ const TargetRegisterClass *WideRC = getMinClassForRegBank(*RB, WideSize);
+ assert(OrigRC && "Could not determine narrow RC?");
+ assert(WideRC && "Could not determine wide RC?");
+
+ // If the sizes
diff er, but the register classes are the same, there is no
+ // need to insert a SUBREG_TO_REG.
+ //
+ // For example, an s8 that's supposed to be a GPR will be selected to either
+ // a GPR32 or a GPR64 register. Note that this assumes that the s8 will
+ // always end up on a GPR32.
+ if (OrigRC == WideRC)
+ return Reg;
+
+ // We have two
diff erent register classes. Insert a SUBREG_TO_REG.
+ unsigned SubReg = 0;
+ getSubRegForClass(OrigRC, TRI, SubReg);
+ assert(SubReg && "Couldn't determine subregister?");
+
+ // Build the SUBREG_TO_REG and return the new, widened register.
+ auto SubRegToReg =
+ MIB.buildInstr(AArch64::SUBREG_TO_REG, {WideRC}, {})
+ .addImm(0)
+ .addUse(Reg)
+ .addImm(SubReg);
+ constrainSelectedInstRegOperands(*SubRegToReg, TII, TRI, RBI);
+ return SubRegToReg.getReg(0);
+}
+
/// Select an "extended register" operand. This operand folds in an extend
/// followed by an optional left shift.
InstructionSelector::ComplexRendererFns
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
index 2d4fd65ab058..af8e03be913b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-fold-ext-tbz-tbnz.mir
@@ -78,9 +78,8 @@ body: |
; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
; CHECK: liveins: $h0
; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:fpr32 = SUBREG_TO_REG 0, $h0, %subreg.hsub
- ; CHECK: %copy:gpr32all = COPY [[SUBREG_TO_REG]]
- ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY %copy
- ; CHECK: TBNZW [[COPY]], 3, %bb.1
+ ; CHECK: %copy:gpr32 = COPY [[SUBREG_TO_REG]]
+ ; CHECK: TBNZW %copy, 3, %bb.1
; CHECK: B %bb.0
; CHECK: bb.1:
; CHECK: RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
new file mode 100644
index 000000000000..22963c50a2eb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/widen-narrow-tbz-tbnz.mir
@@ -0,0 +1,193 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Test widening and narrowing on test bit operations using subregister copies
+# or SUBREG_TO_REG.
+--- |
+ @glob = external unnamed_addr global i1, align 4
+ define void @s1_no_copy() { ret void }
+ define void @s16_no_copy() { ret void }
+ define void @p0_no_copy() { ret void }
+ define void @widen_s32_to_s64() { ret void }
+ define void @widen_s16_to_s64() { ret void }
+ define void @narrow_s64_to_s32() { ret void }
+
+...
+---
+name: s1_no_copy
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: s1_no_copy
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %narrow:gpr32 = IMPLICIT_DEF
+ ; CHECK: TBNZW %narrow, 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ %narrow:gpr(s1) = G_IMPLICIT_DEF
+
+ ; There should be no copy here, because the s1 can be selected to a GPR32.
+ G_BRCOND %narrow(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: s16_no_copy
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: s16_no_copy
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %narrow:gpr32 = IMPLICIT_DEF
+ ; CHECK: TBNZW %narrow, 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ %narrow:gpr(s16) = G_IMPLICIT_DEF
+ %trunc:gpr(s1) = G_TRUNC %narrow(s16)
+
+ ; Look through the G_TRUNC to get the G_IMPLICIT_DEF. We don't need a
+ ; SUBREG_TO_REG here, because the s16 will end up on a 32-bit register.
+ G_BRCOND %trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: p0_no_copy
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: p0_no_copy
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %glob:gpr64common = MOVaddr target-flags(aarch64-page) @glob, target-flags(aarch64-pageoff, aarch64-nc) @glob
+ ; CHECK: %load:gpr32 = LDRBBui %glob, 0 :: (dereferenceable load 1 from @glob, align 4)
+ ; CHECK: TBNZW %load, 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ %glob:gpr(p0) = G_GLOBAL_VALUE @glob
+ %load:gpr(s8) = G_LOAD %glob(p0) :: (dereferenceable load 1 from @glob, align 4)
+ %trunc:gpr(s1) = G_TRUNC %load(s8)
+
+ ; Look through G_TRUNC to get the load. The load is into a s8, which will
+ ; be selected to a GPR32, so we don't need a copy.
+ G_BRCOND %trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: widen_s32_to_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: widen_s32_to_s64
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $w0
+ ; CHECK: %reg:gpr32all = COPY $w0
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
+ ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $w0
+ %reg:gpr(s32) = COPY $w0
+ %zext:gpr(s64) = G_ZEXT %reg(s32)
+ %bit:gpr(s64) = G_CONSTANT i64 8589934592
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %and:gpr(s64) = G_AND %zext, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(eq), %and(s64), %zero
+
+ ; We should widen using a SUBREG_TO_REG here, because we need a TBZX to get
+ ; bit 33. The subregister should be sub_32.
+ %trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: widen_s16_to_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: widen_s16_to_s64
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %reg:gpr32 = IMPLICIT_DEF
+ ; CHECK: [[SUBREG_TO_REG:%[0-9]+]]:gpr64 = SUBREG_TO_REG 0, %reg, %subreg.sub_32
+ ; CHECK: TBZX [[SUBREG_TO_REG]], 33, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ %reg:gpr(s16) = G_IMPLICIT_DEF
+ %zext:gpr(s64) = G_ZEXT %reg(s16)
+ %bit:gpr(s64) = G_CONSTANT i64 8589934592
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %and:gpr(s64) = G_AND %zext, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(eq), %and(s64), %zero
+
+ ; We should widen using a SUBREG_TO_REG here, because we need a TBZX to get
+ ; bit 33. The subregister should be sub_32, because s16 will end up on a
+ ; GPR32.
+ %trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+...
+---
+name: narrow_s64_to_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: narrow_s64_to_s32
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $x0
+ ; CHECK: %wide:gpr64 = COPY $x0
+ ; CHECK: %trunc:gpr32 = COPY %wide.sub_32
+ ; CHECK: TBNZW %trunc, 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %wide:gpr(s64) = COPY $x0
+
+ ; We should narrow using a subregister copy here.
+ %trunc:gpr(s1) = G_TRUNC %wide(s64)
+ G_BRCOND %trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
More information about the llvm-commits
mailing list