[llvm] 609a489 - [AArch64][GlobalISel] Reland SLT/SGT TBNZ optimization
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 7 11:16:20 PST 2020
Author: Jessica Paquette
Date: 2020-02-07T11:15:25-08:00
New Revision: 609a489e052e8ed2731aebbcc7828de1102e871d
URL: https://github.com/llvm/llvm-project/commit/609a489e052e8ed2731aebbcc7828de1102e871d
DIFF: https://github.com/llvm/llvm-project/commit/609a489e052e8ed2731aebbcc7828de1102e871d.diff
LOG: [AArch64][GlobalISel] Reland SLT/SGT TBNZ optimization
The issue in the previous commits was that we swap the LHS and RHS while
looking for the constant. In SLT/SGT, the constant must be on the RHS, or the
optimization is invalid.
Move the swapping logic after the check for the SLT/SGT case and update tests.
Original commits:
d78cefb1601070cb028b61bbc1bd6f25a9c1837c
a3738414072900ace9cbbe209d0195a3443d1d54
Added:
llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 3d6d11f35217..83f1004c258c 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -1222,26 +1222,55 @@ bool AArch64InstructionSelector::selectCompareBranch(
Register LHS = CCMI->getOperand(2).getReg();
Register RHS = CCMI->getOperand(3).getReg();
auto VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
- if (!VRegAndVal)
+ MachineIRBuilder MIB(I);
+ const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
+ MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
+
+ // When we can emit a TB(N)Z, prefer that.
+ //
+ // Handle non-commutative condition codes first.
+ // Note that we don't want to do this when we have a G_AND because it can
+ // become a tst. The tst will make the test bit in the TB(N)Z redundant.
+ if (VRegAndVal && LHSMI->getOpcode() != TargetOpcode::G_AND) {
+ int64_t C = VRegAndVal->Value;
+
+ // When we have a greater-than comparison, we can just test if the msb is
+ // zero.
+ if (C == -1 && Pred == CmpInst::ICMP_SGT) {
+ uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
+ emitTestBit(LHS, Bit, /*IsNegative = */ false, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+
+ // When we have a less than comparison, we can just test if the msb is not
+ // zero.
+ if (C == 0 && Pred == CmpInst::ICMP_SLT) {
+ uint64_t Bit = MRI.getType(LHS).getSizeInBits() - 1;
+ emitTestBit(LHS, Bit, /*IsNegative = */ true, DestMBB, MIB);
+ I.eraseFromParent();
+ return true;
+ }
+ }
+
+ if (!VRegAndVal) {
std::swap(RHS, LHS);
+ VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
+ LHSMI = getDefIgnoringCopies(LHS, MRI);
+ }
- MachineIRBuilder MIB(I);
- VRegAndVal = getConstantVRegValWithLookThrough(RHS, MRI);
if (!VRegAndVal || VRegAndVal->Value != 0) {
// If we can't select a CBZ then emit a cmp + Bcc.
if (!emitIntegerCompare(CCMI->getOperand(2), CCMI->getOperand(3),
CCMI->getOperand(1), MIB))
return false;
- const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
- (CmpInst::Predicate)CCMI->getOperand(1).getPredicate());
+ const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(Pred);
MIB.buildInstr(AArch64::Bcc, {}, {}).addImm(CC).addMBB(DestMBB);
I.eraseFromParent();
return true;
}
- // Try to fold things into the branch.
- const auto Pred = (CmpInst::Predicate)CCMI->getOperand(1).getPredicate();
- MachineInstr *LHSMI = getDefIgnoringCopies(LHS, MRI);
+ // Try to emit a TB(N)Z for an eq or ne condition.
if (tryOptAndIntoCompareBranch(LHSMI, VRegAndVal->Value, Pred, DestMBB,
MIB)) {
I.eraseFromParent();
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
new file mode 100644
index 000000000000..abdacf8b129f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbnz-slt.mir
@@ -0,0 +1,151 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Test that we can produce a TBNZ when we have a slt compare against 0.
+#
+# The bit tested should be the size of the test register minus 1.
+#
+
+...
+---
+name: tbnzx_slt
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: tbnzx_slt
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: TBNZX %copy, 63, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %cmp:gpr(s32) = G_ICMP intpred(slt), %copy(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: tbnzw_slt
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: tbnzw_slt
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr32 = COPY $w0
+ ; CHECK: TBNZW %copy, 31, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s32) = COPY $w0
+ %zero:gpr(s32) = G_CONSTANT i32 0
+ %cmp:gpr(s32) = G_ICMP intpred(slt), %copy(s32), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: no_tbnz_not_zero
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: no_tbnz_not_zero
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr32sp = COPY $w0
+ ; CHECK: $wzr = SUBSWri %copy, 1, 0, implicit-def $nzcv
+ ; CHECK: Bcc 11, %bb.1, implicit $nzcv
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s32) = COPY $w0
+ %one:gpr(s32) = G_CONSTANT i32 1
+ %cmp:gpr(s32) = G_ICMP intpred(slt), %copy(s32), %one
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_fold_and
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_and
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: $xzr = ANDSXri %copy, 8000, implicit-def $nzcv
+ ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+ ; CHECK: TBNZW %cmp, 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %c:gpr(s64) = G_CONSTANT i64 8
+ %and:gpr(s64) = G_AND %copy, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(slt), %and(s64), %zero
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_commute
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_commute
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: %zero:gpr64 = COPY $xzr
+ ; CHECK: $xzr = SUBSXrr %zero, %copy, implicit-def $nzcv
+ ; CHECK: %cmp:gpr32 = CSINCWr $wzr, $wzr, 10, implicit $nzcv
+ ; CHECK: TBNZW %cmp, 0, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %zero:gpr(s64) = G_CONSTANT i64 0
+ %cmp:gpr(s32) = G_ICMP intpred(slt), %zero, %copy(s64)
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir
new file mode 100644
index 000000000000..fe1fd9385e61
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/tbz-sgt.mir
@@ -0,0 +1,151 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+#
+# Test that we can produce a tbz when we have a sgt compare against -1.
+#
+# The bit tested should be the size of the test register minus 1.
+#
+
+...
+---
+name: tbzx_sgt
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: tbzx_sgt
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: TBZX %copy, 63, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %negative_one:gpr(s64) = G_CONSTANT i64 -1
+ %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s64), %negative_one
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: tbzw_sgt
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: tbzw_sgt
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr32 = COPY $w0
+ ; CHECK: TBZW %copy, 31, %bb.1
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s32) = COPY $w0
+ %negative_one:gpr(s32) = G_CONSTANT i32 -1
+ %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s32), %negative_one
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: no_tbz_not_negative_one
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: no_tbz_not_negative_one
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr32sp = COPY $w0
+ ; CHECK: $wzr = SUBSWri %copy, 1, 0, implicit-def $nzcv
+ ; CHECK: Bcc 12, %bb.1, implicit $nzcv
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s32) = COPY $w0
+ %one:gpr(s32) = G_CONSTANT i32 1
+ %cmp:gpr(s32) = G_ICMP intpred(sgt), %copy(s32), %one
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_fold_and
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_fold_and
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: %negative_one:gpr64 = MOVi64imm -1
+ ; CHECK: %and:gpr64common = ANDXri %copy, 8000
+ ; CHECK: $xzr = SUBSXrr %and, %negative_one, implicit-def $nzcv
+ ; CHECK: Bcc 12, %bb.1, implicit $nzcv
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %bit:gpr(s64) = G_CONSTANT i64 8
+ %negative_one:gpr(s64) = G_CONSTANT i64 -1
+ %c:gpr(s64) = G_CONSTANT i64 8
+ %and:gpr(s64) = G_AND %copy, %bit
+ %cmp:gpr(s32) = G_ICMP intpred(sgt), %and(s64), %negative_one
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
+
+...
+---
+name: dont_commute
+alignment: 4
+legalized: true
+regBankSelected: true
+body: |
+ ; CHECK-LABEL: name: dont_commute
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.0(0x40000000), %bb.1(0x40000000)
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: %negative_one:gpr64 = MOVi64imm -1
+ ; CHECK: $xzr = SUBSXrr %negative_one, %copy, implicit-def $nzcv
+ ; CHECK: Bcc 12, %bb.1, implicit $nzcv
+ ; CHECK: B %bb.0
+ ; CHECK: bb.1:
+ ; CHECK: RET_ReallyLR
+ bb.0:
+ successors: %bb.0, %bb.1
+ liveins: $x0
+ %copy:gpr(s64) = COPY $x0
+ %negative_one:gpr(s64) = G_CONSTANT i64 -1
+ %cmp:gpr(s32) = G_ICMP intpred(sgt), %negative_one, %copy(s64)
+ %cmp_trunc:gpr(s1) = G_TRUNC %cmp(s32)
+ G_BRCOND %cmp_trunc(s1), %bb.1
+ G_BR %bb.0
+ bb.1:
+ RET_ReallyLR
More information about the llvm-commits
mailing list