[llvm] be62b3b - [AArch64][GlobalISel] Add a combine to fold away truncate in: G_ICMP EQ/NE (G_TRUNC(v), 0)
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 28 16:29:31 PST 2021
Author: Amara Emerson
Date: 2021-01-28T16:29:14-08:00
New Revision: be62b3ba347d8d9e7ada364ddc9e16d7ddd05b8e
URL: https://github.com/llvm/llvm-project/commit/be62b3ba347d8d9e7ada364ddc9e16d7ddd05b8e
DIFF: https://github.com/llvm/llvm-project/commit/be62b3ba347d8d9e7ada364ddc9e16d7ddd05b8e.diff
LOG: [AArch64][GlobalISel] Add a combine to fold away truncate in: G_ICMP EQ/NE (G_TRUNC(v), 0)
We try to do this optimization if we can determine that testing for the
truncated bits with an eq/ne predicate results in the same thing as testing
the lower bits.
Differential Revision: https://reviews.llvm.org/D95645
Added:
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir
Modified:
llvm/lib/Target/AArch64/AArch64Combine.td
llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index b1e714653f46..980d61dde1d7 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -17,9 +17,17 @@ def fconstant_to_constant : GICombineRule<
[{ return matchFConstantToConstant(*${root}, MRI); }]),
(apply [{ applyFConstantToConstant(*${root}); }])>;
+def icmp_redundant_trunc_matchdata : GIDefMatchData<"Register">;
+def icmp_redundant_trunc : GICombineRule<
+ (defs root:$root, icmp_redundant_trunc_matchdata:$matchinfo),
+ (match (wip_match_opcode G_ICMP):$root,
+ [{ return matchICmpRedundantTrunc(*${root}, MRI, Helper.getKnownBits(), ${matchinfo}); }]),
+ (apply [{ applyICmpRedundantTrunc(*${root}, MRI, B, Observer, ${matchinfo}); }])>;
+
def AArch64PreLegalizerCombinerHelper: GICombinerHelper<
"AArch64GenPreLegalizerCombinerHelper", [all_combines,
- fconstant_to_constant]> {
+ fconstant_to_constant,
+ icmp_redundant_trunc]> {
let DisableRuleOption = "aarch64prelegalizercombiner-disable-rule";
let StateClass = "AArch64PreLegalizerCombinerHelperState";
let AdditionalArguments = [];
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
index 5f9b64e274b3..a1cc0b42eb5a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp
@@ -17,9 +17,13 @@
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
@@ -53,6 +57,56 @@ static void applyFConstantToConstant(MachineInstr &MI) {
MI.eraseFromParent();
}
+/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
+/// are sign bits. In this case, we can transform the G_ICMP to directly compare
+/// the wide value with a zero.
+static bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ GISelKnownBits *KB, Register &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP && KB);
+
+ auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
+ if (!ICmpInst::isEquality(Pred))
+ return false;
+
+ Register LHS = MI.getOperand(2).getReg();
+ LLT LHSTy = MRI.getType(LHS);
+ if (!LHSTy.isScalar())
+ return false;
+
+ Register RHS = MI.getOperand(3).getReg();
+ Register WideReg;
+
+ if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
+ !mi_match(RHS, MRI, m_SpecificICst(0)))
+ return false;
+
+ LLT WideTy = MRI.getType(WideReg);
+ if (KB->computeNumSignBits(WideReg) <=
+ WideTy.getSizeInBits() - LHSTy.getSizeInBits())
+ return false;
+
+ MatchInfo = WideReg;
+ return true;
+}
+
+static bool applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &Builder,
+ GISelChangeObserver &Observer,
+ Register &WideReg) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+
+ LLT WideTy = MRI.getType(WideReg);
+ // We're going to directly use the wide register as the LHS, and then use an
+ // equivalent size zero for RHS.
+ Builder.setInstrAndDebugLoc(MI);
+ auto WideZero = Builder.buildConstant(WideTy, 0);
+ Observer.changingInstr(MI);
+ MI.getOperand(2).setReg(WideReg);
+ MI.getOperand(3).setReg(WideZero.getReg(0));
+ Observer.changedInstr(MI);
+ return true;
+}
+
class AArch64PreLegalizerCombinerHelperState {
protected:
CombinerHelper &Helper;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir
new file mode 100644
index 000000000000..f89d8e31fa48
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-icmp-redundant-trunc.mir
@@ -0,0 +1,107 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-ios -run-pass=aarch64-prelegalizer-combiner %s -o - -verify-machineinstrs | FileCheck %s
+
+# This test checks the optimization to remove the G_TRUNC if we can determine it's redundant.
+---
+name: icmp_trunc_sextload
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: icmp_trunc_sextload
+ ; CHECK: liveins: $x0
+ ; CHECK: %v:_(p0) = COPY $x0
+ ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %load(s64), [[C]]
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+ ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %v:_(p0) = COPY $x0
+ %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+ %trunc:_(s32) = G_TRUNC %load(s64)
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s32), %zero
+ %5:_(s32) = G_ANYEXT %cmp
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: icmp_trunc_sextload_eq
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: icmp_trunc_sextload_eq
+ ; CHECK: liveins: $x0
+ ; CHECK: %v:_(p0) = COPY $x0
+ ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %load(s64), [[C]]
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+ ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %v:_(p0) = COPY $x0
+ %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+ %trunc:_(s32) = G_TRUNC %load(s64)
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(eq), %trunc(s32), %zero
+ %5:_(s32) = G_ANYEXT %cmp
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: icmp_trunc_sextload_wrongpred
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: icmp_trunc_sextload_wrongpred
+ ; CHECK: liveins: $x0
+ ; CHECK: %v:_(p0) = COPY $x0
+ ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+ ; CHECK: %trunc:_(s32) = G_TRUNC %load(s64)
+ ; CHECK: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+ ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %v:_(p0) = COPY $x0
+ %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+ %trunc:_(s32) = G_TRUNC %load(s64)
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(slt), %trunc(s32), %zero
+ %5:_(s32) = G_ANYEXT %cmp
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: icmp_trunc_sextload_extend_mismatch
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: icmp_trunc_sextload_extend_mismatch
+ ; CHECK: liveins: $x0
+ ; CHECK: %v:_(p0) = COPY $x0
+ ; CHECK: %load:_(s64) = G_SEXTLOAD %v(p0) :: (load 4)
+ ; CHECK: %trunc:_(s16) = G_TRUNC %load(s64)
+ ; CHECK: %zero:_(s16) = G_CONSTANT i16 0
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT %cmp(s1)
+ ; CHECK: $w0 = COPY [[ANYEXT]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %v:_(p0) = COPY $x0
+ %load:_(s64) = G_SEXTLOAD %v:_(p0) :: (load 4)
+ %trunc:_(s16) = G_TRUNC %load(s64)
+ %zero:_(s16) = G_CONSTANT i16 0
+ %cmp:_(s1) = G_ICMP intpred(ne), %trunc(s16), %zero
+ %5:_(s32) = G_ANYEXT %cmp
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+...
+
More information about the llvm-commits
mailing list