[llvm] [GlobalISel] Combine (X >> C) << C to X & ((-1 >> C) << C) (PR #114821)
Dávid Ferenc Szabó via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 4 08:04:23 PST 2024
https://github.com/dfszabo created https://github.com/llvm/llvm-project/pull/114821
Clearing of the lower bits usually done by a pair of shifts, but most target support such mask values for bitwise AND instructions, therefore combining the shifts to and is benefical.
>From d5bf5bb9d3008b10cfa4cb04f14955c74bd6c13c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Ferenc=20Szab=C3=B3?=
<szabodavidferenc at gmail.com>
Date: Mon, 4 Nov 2024 16:57:15 +0100
Subject: [PATCH] [GlobalISel] Combine (X >> C) << C to X & ((-1 >> C) << C)
Clearing of the lower bits usually done by a pair of shifts, but most target support such mask values for bitwise AND instructions, therefore combining the shifts to and is benefical.
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 1 +
.../CodeGen/GlobalISel/GenericMachineInstrs.h | 11 ++
.../include/llvm/Target/GlobalISel/Combine.td | 10 +-
.../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 33 ++++
.../GlobalISel/combine-shifts-to-and.mir | 142 ++++++++++++++++++
5 files changed, 196 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9240a3c3127eb4..eb00648b45356b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -315,6 +315,7 @@ class CombinerHelper {
void applyShiftOfShiftedLogic(MachineInstr &MI,
ShiftOfShiftedLogic &MatchInfo);
+ bool matchLsbClearByShifts(MachineInstr &MI, BuildFnTy &MatchInfo);
bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo);
/// Transform a multiply by a power-of-2 value to a left shift.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index cd7ebcf54c9e1e..3e072c9f61b17c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -933,6 +933,17 @@ class GShl : public GenericMachineInstr {
};
};
+/// Represents a logical shift right.
+class GLshr : public GenericMachineInstr {
+public:
+ Register getSrcReg() const { return getOperand(1).getReg(); }
+ Register getShiftReg() const { return getOperand(2).getReg(); }
+
+ static bool classof(const MachineInstr *MI) {
+ return MI->getOpcode() == TargetOpcode::G_LSHR;
+ };
+};
+
/// Represents a threeway compare.
class GSUCmp : public GenericMachineInstr {
public:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ead4149fc11068..03da7058d715df 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -365,6 +365,13 @@ def bitreverse_lshr : GICombineRule<
MRI.getType(${amt}.getReg())}}); }]),
(apply (G_SHL $d, $val, $amt))>;
+def lsb_bits_clearing_by_shifts_to_and: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_LSHR $d, $src, $amt1),
+ (G_SHL $root, $d, $amt2):$root,
+ [{ return Helper.matchLsbClearByShifts(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
// Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
// Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
def commute_shift : GICombineRule<
@@ -1930,7 +1937,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
not_cmp_fold, opt_brcond_by_inverting_cond,
const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
- div_rem_to_divrem, funnel_shift_combines, bitreverse_shift, commute_shift,
+ div_rem_to_divrem, funnel_shift_combines, bitreverse_shift,
+ lsb_bits_clearing_by_shifts_to_and, commute_shift,
form_bitfield_extract, constant_fold_binops, constant_fold_fma,
constant_fold_cast_op, fabs_fneg_fold,
intdiv_combines, mulh_combines, redundant_neg_operands,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 7c1bda2163b7a0..596c6a16e93244 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1983,6 +1983,39 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
MI.eraseFromParent();
}
+bool CombinerHelper::matchLsbClearByShifts(MachineInstr &MI,
+ BuildFnTy &MatchInfo) {
+ // fold (A >> C) << C to A & K, where K = (-1 >> C) << C
+ const GShl *Shl = cast<GShl>(&MI);
+ GLshr *Lshr = cast<GLshr>(MRI.getVRegDef(Shl->getSrcReg()));
+
+ if (!MRI.hasOneNonDBGUse(Lshr->getReg(0)))
+ return false;
+
+ APInt C1, C2;
+ if (!mi_match(Shl->getShiftReg(), MRI, m_ICstOrSplat(C1)) ||
+ !mi_match(Lshr->getShiftReg(), MRI, m_ICstOrSplat(C2)))
+ return false;
+
+ if (C2.ne(C1))
+ return false;
+
+ APInt C = C1;
+ C.setAllBits();
+ C = C.lshr(C1);
+ C = C.shl(C1);
+
+ Register Dst = Shl->getReg(0);
+ LLT DstTy = MRI.getType(Dst);
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ auto Const = B.buildConstant(DstTy, C);
+ B.buildAnd(Dst, Lshr->getSrcReg(), Const);
+ };
+
+ return true;
+}
+
bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {
assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
// Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir
new file mode 100644
index 00000000000000..71236fb980e364
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir
@@ -0,0 +1,142 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: test_signle_g_constant
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: test_signle_g_constant
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %2:_(s32) = G_CONSTANT i32 1
+ %3:_(s32) = G_LSHR %1, %2
+ %4:_(s32) = G_SHL %3, %2(s32)
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_double_g_constant
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: test_double_g_constant
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %2:_(s32) = G_CONSTANT i32 1
+ %3:_(s32) = G_CONSTANT i32 1
+ %4:_(s32) = G_LSHR %1, %2
+ %5:_(s32) = G_SHL %4, %3(s32)
+ $w0 = COPY %5
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_const_2
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: test_const_2
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %2:_(s32) = G_CONSTANT i32 2
+ %3:_(s32) = G_LSHR %1, %2
+ %4:_(s32) = G_SHL %3, %2(s32)
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_const_3
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: test_const_3
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %2:_(s32) = G_CONSTANT i32 3
+ %3:_(s32) = G_LSHR %1, %2
+ %4:_(s32) = G_SHL %3, %2(s32)
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_const_4
+body: |
+ bb.1:
+ liveins: $w0
+
+ ; CHECK-LABEL: name: test_const_4
+ ; CHECK: liveins: $w0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -16
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %1:_(s32) = COPY $w0
+ %2:_(s32) = G_CONSTANT i32 4
+ %3:_(s32) = G_LSHR %1, %2
+ %4:_(s32) = G_SHL %3, %2(s32)
+ $w0 = COPY %4
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: test_vector
+body: |
+ bb.1:
+ liveins: $w1, $x0
+
+ ; CHECK-LABEL: name: test_vector
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK-NEXT: %xvec:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY1]](s32), [[COPY1]](s32), [[COPY1]](s32)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND %xvec, [[BUILD_VECTOR]]
+ ; CHECK-NEXT: G_STORE [[AND]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>))
+ ; CHECK-NEXT: RET_ReallyLR
+ %0:_(p0) = COPY $x0
+ %1:_(s32) = COPY $w1
+ %xvec:_(<4 x s32>) = G_BUILD_VECTOR %1, %1, %1, %1
+ %2:_(s32) = G_CONSTANT i32 2
+ %veccst:_(<4 x s32>) = G_BUILD_VECTOR %2, %2, %2, %2
+ %3:_(<4 x s32>) = G_LSHR %xvec, %veccst
+ %5:_(<4 x s32>) = G_SHL %3, %veccst
+ G_STORE %5(<4 x s32>), %0(p0) :: (store (<4 x s32>))
+ RET_ReallyLR
+
+...
More information about the llvm-commits
mailing list