[llvm] 844d8e0 - [GlobalISel] Combine icmp eq/ne x, 0/1 -> x when x == 0 or 1
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 2 15:05:50 PDT 2021
Author: Jessica Paquette
Date: 2021-09-02T15:05:31-07:00
New Revision: 844d8e0337560bd73b5a78fd8ff162b1b262b46f
URL: https://github.com/llvm/llvm-project/commit/844d8e0337560bd73b5a78fd8ff162b1b262b46f
DIFF: https://github.com/llvm/llvm-project/commit/844d8e0337560bd73b5a78fd8ff162b1b262b46f.diff
LOG: [GlobalISel] Combine icmp eq/ne x, 0/1 -> x when x == 0 or 1
This adds the following combines:
```
x = ... 0 or 1
c = icmp eq x, 1
->
c = x
```
and
```
x = ... 0 or 1
c = icmp ne x, 0
->
c = x
```
When the target's true value for the relevant types is 1.
This showed up in the following situation:
https://godbolt.org/z/M5jKexWTW
SDAG currently supports the `ne` case, but not the `eq` case. This can probably
be further generalized, but I don't feel like thinking that hard right now.
This gives some minor code size improvements across the board on CTMark at
-Os for AArch64. (0.1% for 7zip and pairlocalalign in particular.)
Differential Revision: https://reviews.llvm.org/D109130
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index d892a7525a6d3..8bc89cbc40bb5 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -553,6 +553,12 @@ class CombinerHelper {
/// or false constant based off of KnownBits information.
bool matchICmpToTrueFalseKnownBits(MachineInstr &MI, int64_t &MatchInfo);
+ /// \returns true if a G_ICMP \p MI can be replaced with its LHS based off of
+ /// KnownBits information.
+ bool
+ matchICmpToLHSKnownBits(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
bool matchBitfieldExtractFromSExtInReg(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo);
/// Match: and (lshr x, cst), mask -> ubfx x, cst, width
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e65073a1d28d0..1808aa6e6e66a 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -644,6 +644,12 @@ def icmp_to_true_false_known_bits : GICombineRule<
[{ return Helper.matchICmpToTrueFalseKnownBits(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+def icmp_to_lhs_known_bits : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_ICMP):$root,
+ [{ return Helper.matchICmpToLHSKnownBits(*${root}, ${info}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${info}); }])>;
+
def bitfield_extract_from_and : GICombineRule<
(defs root:$root, build_fn_matchinfo:$info),
(match (wip_match_opcode G_AND):$root,
@@ -702,7 +708,7 @@ def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits]>;
+ zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
narrow_binop_feeding_and]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 34322cb22d25c..cd363d7d449c6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4118,6 +4118,48 @@ bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
return true;
}
+bool CombinerHelper::matchICmpToLHSKnownBits(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_ICMP);
+ // Given:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP ne %x, 0
+ //
+ // Or:
+ //
+ // %x = G_WHATEVER (... x is known to be 0 or 1 ...)
+ // %cmp = G_ICMP eq %x, 1
+ //
+ // We can replace %cmp with %x assuming true is 1 on the target.
+ auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ if (!CmpInst::isEquality(Pred))
+ return false;
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ if (getICmpTrueVal(getTargetLowering(), DstTy.isVector(),
+ /* IsFP = */ false) != 1)
+ return false;
+ int64_t OneOrZero = Pred == CmpInst::ICMP_EQ;
+ if (!mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(OneOrZero)))
+ return false;
+ Register LHS = MI.getOperand(2).getReg();
+ auto KnownLHS = KB->getKnownBits(LHS);
+ if (KnownLHS.getMinValue() != 0 || KnownLHS.getMaxValue() != 1)
+ return false;
+ // Make sure replacing Dst with the LHS is a legal operation.
+ LLT LHSTy = MRI.getType(LHS);
+ unsigned LHSSize = LHSTy.getSizeInBits();
+ unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Op = TargetOpcode::COPY;
+ if (DstSize != LHSSize)
+ Op = DstSize < LHSSize ? TargetOpcode::G_TRUNC : TargetOpcode::G_ZEXT;
+ if (!isLegalOrBeforeLegalizer({Op, {DstTy, LHSTy}}))
+ return false;
+ MatchInfo = [=](MachineIRBuilder &B) { B.buildInstr(Op, {Dst}, {LHS}); };
+ return true;
+}
+
/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir
new file mode 100644
index 0000000000000..c9a648b893fa2
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-icmp-to-lhs-known-bits.mir
@@ -0,0 +1,229 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="icmp_to_lhs_known_bits" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+...
+---
+name: apply_ne
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w0
+ ; CHECK-LABEL: name: apply_ne
+ ; CHECK: liveins: $w0
+ ; CHECK: %x:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 1
+ ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one
+ ; CHECK: %cmp:_(s1) = G_TRUNC %known_zero_or_one(s32)
+ ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK: $w0 = COPY %ext(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %one:_(s32) = G_CONSTANT i32 1
+ %known_zero_or_one:_(s32) = G_AND %x, %one
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s1) = G_ICMP intpred(ne), %known_zero_or_one(s32), %zero
+ %ext:_(s32) = G_ZEXT %cmp(s1)
+ $w0 = COPY %ext(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: apply_eq
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w0
+ ; CHECK-LABEL: name: apply_eq
+ ; CHECK: liveins: $w0
+ ; CHECK: %x:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 1
+ ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one
+ ; CHECK: %cmp:_(s1) = G_TRUNC %known_zero_or_one(s32)
+ ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK: $w0 = COPY %ext(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %one:_(s32) = G_CONSTANT i32 1
+ %known_zero_or_one:_(s32) = G_AND %x, %one
+ %cmp:_(s1) = G_ICMP intpred(eq), %known_zero_or_one(s32), %one
+ %ext:_(s32) = G_ZEXT %cmp(s1)
+ $w0 = COPY %ext(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: dont_apply_wrong_cst_eq
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w0
+ ; Wrong constant on the RHS of the compare.
+
+ ; CHECK-LABEL: name: dont_apply_wrong_cst_eq
+ ; CHECK: liveins: $w0
+ ; CHECK: %x:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 1
+ ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one
+ ; CHECK: %wrong_cst:_(s32) = G_CONSTANT i32 10
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(eq), %known_zero_or_one(s32), %wrong_cst
+ ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK: $w0 = COPY %ext(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %one:_(s32) = G_CONSTANT i32 1
+ %known_zero_or_one:_(s32) = G_AND %x, %one
+ %wrong_cst:_(s32) = G_CONSTANT i32 10
+ %cmp:_(s1) = G_ICMP intpred(eq), %known_zero_or_one(s32), %wrong_cst
+ %ext:_(s32) = G_ZEXT %cmp(s1)
+ $w0 = COPY %ext(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: dont_apply_wrong_cst_ne
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w0
+ ; Wrong constant on the RHS of the compare.
+
+ ; CHECK-LABEL: name: dont_apply_wrong_cst_ne
+ ; CHECK: liveins: $w0
+ ; CHECK: %x:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 1
+ ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one
+ ; CHECK: %wrong_cst:_(s32) = G_CONSTANT i32 10
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(ne), %known_zero_or_one(s32), %wrong_cst
+ ; CHECK: %ext:_(s32) = G_ZEXT %cmp(s1)
+ ; CHECK: $w0 = COPY %ext(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %one:_(s32) = G_CONSTANT i32 1
+ %known_zero_or_one:_(s32) = G_AND %x, %one
+ %wrong_cst:_(s32) = G_CONSTANT i32 10
+ %cmp:_(s1) = G_ICMP intpred(ne), %known_zero_or_one(s32), %wrong_cst
+ %ext:_(s32) = G_ZEXT %cmp(s1)
+ $w0 = COPY %ext(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: dont_apply_vector
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0
+ ; True is -1 for vectors on AArch64 so we don't want to combine.
+
+ ; CHECK-LABEL: name: dont_apply_vector
+ ; CHECK: liveins: $x0
+ ; CHECK: %x:_(<2 x s32>) = COPY $x0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 1
+ ; CHECK: %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one(s32), %one(s32)
+ ; CHECK: %vec_and:_(<2 x s32>) = G_AND %x, %one_vec
+ ; CHECK: %zero:_(s32) = G_CONSTANT i32 0
+ ; CHECK: %zero_vec:_(<2 x s32>) = G_BUILD_VECTOR %zero(s32), %zero(s32)
+ ; CHECK: %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %vec_and(<2 x s32>), %zero_vec
+ ; CHECK: %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp(<2 x s1>), %zero(s32)
+ ; CHECK: %ext:_(s32) = G_ZEXT %elt(s1)
+ ; CHECK: $w0 = COPY %ext(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %x:_(<2 x s32>) = COPY $x0
+ %one:_(s32) = G_CONSTANT i32 1
+ %one_vec:_(<2 x s32>) = G_BUILD_VECTOR %one, %one
+ %vec_and:_(<2 x s32>) = G_AND %x, %one_vec
+ %zero:_(s32) = G_CONSTANT i32 0
+ %zero_vec:_(<2 x s32>) = G_BUILD_VECTOR %zero, %zero
+ %cmp:_(<2 x s1>) = G_ICMP intpred(ne), %vec_and(<2 x s32>), %zero_vec
+ %elt:_(s1) = G_EXTRACT_VECTOR_ELT %cmp, %zero
+ %ext:_(s32) = G_ZEXT %elt(s1)
+ $w0 = COPY %ext(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: apply_no_zext_or_trunc
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w0
+ ; CHECK-LABEL: name: apply_no_zext_or_trunc
+ ; CHECK: liveins: $w0
+ ; CHECK: %x:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 1
+ ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one
+ ; CHECK: %cmp:_(s32) = COPY %known_zero_or_one(s32)
+ ; CHECK: $w0 = COPY %cmp(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %x:_(s32) = COPY $w0
+ %one:_(s32) = G_CONSTANT i32 1
+ %known_zero_or_one:_(s32) = G_AND %x, %one
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s32) = G_ICMP intpred(ne), %known_zero_or_one(s32), %zero
+ $w0 = COPY %cmp(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: apply_wide_cmp
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w0
+ ; CHECK-LABEL: name: apply_wide_cmp
+ ; CHECK: liveins: $w0
+ ; CHECK: %x:_(s64) = COPY $x0
+ ; CHECK: %one:_(s64) = G_CONSTANT i64 1
+ ; CHECK: %known_zero_or_one:_(s64) = G_AND %x, %one
+ ; CHECK: %cmp:_(s64) = COPY %known_zero_or_one(s64)
+ ; CHECK: %trunc:_(s32) = G_TRUNC %cmp(s64)
+ ; CHECK: $w0 = COPY %trunc(s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %x:_(s64) = COPY $x0
+ %one:_(s64) = G_CONSTANT i64 1
+ %known_zero_or_one:_(s64) = G_AND %x, %one
+ %zero:_(s64) = G_CONSTANT i64 0
+ %cmp:_(s64) = G_ICMP intpred(ne), %known_zero_or_one(s64), %zero
+ %trunc:_(s32) = G_TRUNC %cmp
+ $w0 = COPY %trunc(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: apply_narrow_lhs
+alignment: 4
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w0
+ ; CHECK-LABEL: name: apply_narrow_lhs
+ ; CHECK: liveins: $w0
+ ; CHECK: %x:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 1
+ ; CHECK: %known_zero_or_one:_(s32) = G_AND %x, %one
+ ; CHECK: %cmp:_(s64) = G_ZEXT %known_zero_or_one(s32)
+ ; CHECK: $x0 = COPY %cmp(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ %x:_(s32) = COPY $w0
+ %one:_(s32) = G_CONSTANT i32 1
+ %known_zero_or_one:_(s32) = G_AND %x, %one
+ %zero:_(s32) = G_CONSTANT i32 0
+ %cmp:_(s64) = G_ICMP intpred(ne), %known_zero_or_one(s32), %zero
+ $x0 = COPY %cmp(s64)
+ RET_ReallyLR implicit $x0
More information about the llvm-commits
mailing list