[llvm] dba29f7 - [AArch64][GlobalISel] Fold G_AND into G_BRCOND
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 14:02:17 PST 2020
Author: Jessica Paquette
Date: 2020-01-28T14:00:31-08:00
New Revision: dba29f7c3b36443c7287ba5a31d166f8edb73544
URL: https://github.com/llvm/llvm-project/commit/dba29f7c3b36443c7287ba5a31d166f8edb73544
DIFF: https://github.com/llvm/llvm-project/commit/dba29f7c3b36443c7287ba5a31d166f8edb73544.diff
LOG: [AArch64][GlobalISel] Fold G_AND into G_BRCOND
When the G_BRCOND is fed by a eq or ne G_ICMP, it may be possible to fold a
G_AND into the branch by producing a tbnz/tbz instead.
This happens when
1. We have a ne/eq G_ICMP feeding into the G_BRCOND
2. The G_ICMP is a comparison against 0
3. One of the operands of the G_AND is a power of 2 constant
This is very similar to the code in AArch64TargetLowering::LowerBR_CC.
Add opt-and-tbnz-tbz to test this.
Differential Revision: https://reviews.llvm.org/D73573
Added:
llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index a943d25d895d..bfa329b1fe94 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -89,6 +89,11 @@ class AArch64InstructionSelector : public InstructionSelector {
bool selectVaStartDarwin(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
+ bool tryOptAndIntoCompareBranch(MachineInstr *LHS,
+ int64_t CmpConstant,
+ const CmpInst::Predicate &Pred,
+ MachineBasicBlock *DstMBB,
+ MachineIRBuilder &MIB) const;
bool selectCompareBranch(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
@@ -983,6 +988,64 @@ static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
}
}
+bool AArch64InstructionSelector::tryOptAndIntoCompareBranch(
+ MachineInstr *AndInst, int64_t CmpConstant, const CmpInst::Predicate &Pred,
+ MachineBasicBlock *DstMBB, MachineIRBuilder &MIB) const {
+ // Given something like this:
+ //
+ // %x = ...Something...
+ // %one = G_CONSTANT i64 1
+ // %zero = G_CONSTANT i64 0
+ // %and = G_AND %x, %one
+ // %cmp = G_ICMP intpred(ne), %and, %zero
+ // %cmp_trunc = G_TRUNC %cmp
+ // G_BRCOND %cmp_trunc, %bb.3
+ //
+ // We want to try and fold the AND into the G_BRCOND and produce either a
+ // TBNZ (when we have intpred(ne)) or a TBZ (when we have intpred(eq)).
+ //
+ // In this case, we'd get
+ //
+ // TBNZ %x %bb.3
+ //
+ if (!AndInst || AndInst->getOpcode() != TargetOpcode::G_AND)
+ return false;
+
+ // Need to be comparing against 0 to fold.
+ if (CmpConstant != 0)
+ return false;
+
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned Opc = 0;
+ Register TestReg = AndInst->getOperand(1).getReg();
+ unsigned TestSize = MRI.getType(TestReg).getSizeInBits();
+
+ // Only support EQ and NE. If we have LT, then it *is* possible to fold, but
+ // we don't want to do this. When we have an AND and LT, we need a TST/ANDS,
+ // so folding would be redundant.
+ if (Pred == CmpInst::Predicate::ICMP_EQ)
+ Opc = TestSize == 32 ? AArch64::TBZW : AArch64::TBZX;
+ else if (Pred == CmpInst::Predicate::ICMP_NE)
+ Opc = TestSize == 32 ? AArch64::TBNZW : AArch64::TBNZX;
+ else
+ return false;
+
+ // Check if the AND has a constant on its RHS which we can use as a mask.
+ // If it's a power of 2, then it's the same as checking a specific bit.
+ // (e.g, ANDing with 8 == ANDing with 000...100 == testing if bit 3 is set)
+ auto MaybeBit =
+ getConstantVRegValWithLookThrough(AndInst->getOperand(2).getReg(), MRI);
+ if (!MaybeBit || !isPowerOf2_64(MaybeBit->Value))
+ return false;
+ uint64_t Bit = Log2_64(static_cast<uint64_t>(MaybeBit->Value));
+
+ // Construct the branch.
+ auto BranchMI =
+ MIB.buildInstr(Opc).addReg(TestReg).addImm(Bit).addMBB(DstMBB);
+ constrainSelectedInstRegOperands(*BranchMI, TII, TRI, RBI);
+ return true;
+}
+
bool AArch64InstructionSelector::selectCompareBranch(
MachineInstr &I, MachineFunction &MF, MachineRegisterInfo &MRI) const {
@@ -1000,9 +1063,9 @@ bool AArch64InstructionSelector::selectCompareBranch(
if (!VRegAndVal)
std::swap(RHS, LHS);
+ MachineIRBuilder MIB(I);
VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
if (!VRegAndVal || VRegAndVal->Value != 0) {
- MachineIRBuilder MIB(I);
// If we can't select a CBZ then emit a cmp + Bcc.
if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
CCMI->getOperand(1), MIB))
@@ -1014,11 +1077,18 @@ bool AArch64InstructionSelector::selectCompareBranch(
return true;
}
+ // Try to fold things into the branch.
+ const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
+ MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
+ if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
+ MIB)) {
+ I.eraseFromParent();
+ return true;
+ }
+
const RegisterBank &RB = *RBI.getRegBank(LHS, MRI, TRI);
if (RB.getID() != AArch64::GPRRegBankID)
return false;
-
- const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
if (Pred != CmpInst::ICMP_NE && Pred != CmpInst::ICMP_EQ)
return false;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
new file mode 100644
index 000000000000..525bbe588142
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-and-tbnz-tbz.mir
@@ -0,0 +1,257 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Verify that we can fold G_AND into G_BRCOND when all of the following hold:
+# 1. We have a ne/eq G_ICMP feeding into the G_BRCOND
+# 2. The G_ICMP is being compared against 0
+# 3. One of the operands of the G_AND is a power of 2
+#
+# If all of these hold, we should produce a tbnz or a tbz.
+...
+---
+name: tbnz_and_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: tbnz_and_s64
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: TBNZX [[COPY]], 3, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 8 ; Power of 2 => TBNZ uses 3 as mask
+ %3:gpr(s64) = G_CONSTANT i64 0
+ %2:gpr(s64) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: tbz_and_s64
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: tbz_and_s64
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $x0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: TBZX [[COPY]], 4, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 16 ; Power of 2 => TBNZ uses 4 as mask
+ %3:gpr(s64) = G_CONSTANT i64 0
+ %2:gpr(s64) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(eq), %2(s64), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: tbnz_and_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: tbnz_and_s32
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: TBNZW [[COPY]], 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $w0
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %2:gpr(s32) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(ne), %2(s32), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: tbz_and_s32
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: tbz_and_s32
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: TBZW [[COPY]], 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $w0
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = G_CONSTANT i32 1 ; Power of 2 => TBNZ uses 0 as mask
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %2:gpr(s32) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(eq), %2(s32), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_fold_and_lt
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_and_lt
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+ ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $w0
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %2:gpr(s32) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(slt), %2(s32), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_fold_and_gt
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_and_gt
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: liveins: $w0
+ ; CHECK: [[COPY:%[0-9]+]]:gpr32 = COPY $w0
+ ; CHECK: $wzr = ANDSWri [[COPY]], 0, implicit-def $nzcv
+ ; CHECK: [[CSINCWr:%[0-9]+]]:gpr32 = CSINCWr $wzr, $wzr, 13, implicit $nzcv
+ ; CHECK: TBNZW [[CSINCWr]], 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $w0
+ %0:gpr(s32) = COPY $w0
+ %1:gpr(s32) = G_CONSTANT i32 1
+ %3:gpr(s32) = G_CONSTANT i32 0
+ %2:gpr(s32) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(sgt), %2(s32), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_fold_and_not_power_of_2
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_and_not_power_of_2
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[ANDXri:%[0-9]+]]:gpr64common = ANDXri [[COPY]], 4098
+ ; CHECK: CBNZX [[ANDXri]], %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 7
+ %3:gpr(s64) = G_CONSTANT i64 0
+ %2:gpr(s64) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_fold_cmp_not_0
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_cmp_not_0
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[ANDXri:%[0-9]+]]:gpr64sp = ANDXri [[COPY]], 8064
+ ; CHECK: $xzr = SUBSXri [[ANDXri]], 4, 0, implicit-def $nzcv
+ ; CHECK: Bcc 1, %bb.1, implicit $nzcv
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %0:gpr(s64) = COPY $x0
+ %1:gpr(s64) = G_CONSTANT i64 4
+ %3:gpr(s64) = G_CONSTANT i64 4
+ %2:gpr(s64) = G_AND %0, %1
+ %5:gpr(s32) = G_ICMP intpred(ne), %2(s64), %3
+ %4:gpr(s1) = G_TRUNC %5(s32)
+ G_BRCOND %4(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
More information about the llvm-commits
mailing list