[llvm] [GlobalISel] Combine (X >> C) << C to X & ((-1 >> C) << C) (PR #114821)

Dávid Ferenc Szabó via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 4 08:04:23 PST 2024


https://github.com/dfszabo created https://github.com/llvm/llvm-project/pull/114821

Clearing of the lower bits usually done by a pair of shifts, but most target support such mask values for bitwise AND instructions, therefore combining the shifts to and is benefical.

>From d5bf5bb9d3008b10cfa4cb04f14955c74bd6c13c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?D=C3=A1vid=20Ferenc=20Szab=C3=B3?=
 <szabodavidferenc at gmail.com>
Date: Mon, 4 Nov 2024 16:57:15 +0100
Subject: [PATCH] [GlobalISel] Combine (X >> C) << C to X & ((-1 >> C) << C)

Clearing of the lower bits usually done by a pair of shifts, but most target support such mask values for bitwise AND instructions, therefore combining the shifts to and is benefical.
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |   1 +
 .../CodeGen/GlobalISel/GenericMachineInstrs.h |  11 ++
 .../include/llvm/Target/GlobalISel/Combine.td |  10 +-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp |  33 ++++
 .../GlobalISel/combine-shifts-to-and.mir      | 142 ++++++++++++++++++
 5 files changed, 196 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9240a3c3127eb4..eb00648b45356b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -315,6 +315,7 @@ class CombinerHelper {
   void applyShiftOfShiftedLogic(MachineInstr &MI,
                                 ShiftOfShiftedLogic &MatchInfo);
 
+  bool matchLsbClearByShifts(MachineInstr &MI, BuildFnTy &MatchInfo);
   bool matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo);
 
   /// Transform a multiply by a power-of-2 value to a left shift.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
index cd7ebcf54c9e1e..3e072c9f61b17c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/GenericMachineInstrs.h
@@ -933,6 +933,17 @@ class GShl : public GenericMachineInstr {
   };
 };
 
+/// Represents a logical shift right.
+class GLshr : public GenericMachineInstr {
+public:
+  Register getSrcReg() const { return getOperand(1).getReg(); }
+  Register getShiftReg() const { return getOperand(2).getReg(); }
+
+  static bool classof(const MachineInstr *MI) {
+    return MI->getOpcode() == TargetOpcode::G_LSHR;
+  };
+};
+
 /// Represents a threeway compare.
 class GSUCmp : public GenericMachineInstr {
 public:
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ead4149fc11068..03da7058d715df 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -365,6 +365,13 @@ def bitreverse_lshr : GICombineRule<
                                                     MRI.getType(${amt}.getReg())}}); }]),
   (apply (G_SHL $d, $val, $amt))>;
 
+def lsb_bits_clearing_by_shifts_to_and: GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (G_LSHR $d, $src, $amt1),
+         (G_SHL $root, $d, $amt2):$root,
+   [{ return Helper.matchLsbClearByShifts(*${root}, ${matchinfo}); }]),
+   (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
 // Combine (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
 def commute_shift : GICombineRule<
@@ -1930,7 +1937,8 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     not_cmp_fold, opt_brcond_by_inverting_cond,
     const_combines, xor_of_and_with_same_reg, ptr_add_with_zero,
     shift_immed_chain, shift_of_shifted_logic_chain, load_or_combine,
-    div_rem_to_divrem, funnel_shift_combines, bitreverse_shift, commute_shift,
+    div_rem_to_divrem, funnel_shift_combines, bitreverse_shift,
+    lsb_bits_clearing_by_shifts_to_and, commute_shift,
     form_bitfield_extract, constant_fold_binops, constant_fold_fma,
     constant_fold_cast_op, fabs_fneg_fold,
     intdiv_combines, mulh_combines, redundant_neg_operands,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 7c1bda2163b7a0..596c6a16e93244 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1983,6 +1983,39 @@ void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
   MI.eraseFromParent();
 }
 
+bool CombinerHelper::matchLsbClearByShifts(MachineInstr &MI,
+                                           BuildFnTy &MatchInfo) {
+  // fold (A >> C) << C to A & K, where K = (-1 >> C) << C
+  const GShl *Shl = cast<GShl>(&MI);
+  GLshr *Lshr = cast<GLshr>(MRI.getVRegDef(Shl->getSrcReg()));
+
+  if (!MRI.hasOneNonDBGUse(Lshr->getReg(0)))
+    return false;
+
+  APInt C1, C2;
+  if (!mi_match(Shl->getShiftReg(), MRI, m_ICstOrSplat(C1)) ||
+      !mi_match(Lshr->getShiftReg(), MRI, m_ICstOrSplat(C2)))
+    return false;
+
+  if (C2.ne(C1))
+    return false;
+
+  APInt C = C1;
+  C.setAllBits();
+  C = C.lshr(C1);
+  C = C.shl(C1);
+
+  Register Dst = Shl->getReg(0);
+  LLT DstTy = MRI.getType(Dst);
+
+  MatchInfo = [=](MachineIRBuilder &B) {
+    auto Const = B.buildConstant(DstTy, C);
+    B.buildAnd(Dst, Lshr->getSrcReg(), Const);
+  };
+
+  return true;
+}
+
 bool CombinerHelper::matchCommuteShift(MachineInstr &MI, BuildFnTy &MatchInfo) {
   assert(MI.getOpcode() == TargetOpcode::G_SHL && "Expected G_SHL");
   // Combine (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir
new file mode 100644
index 00000000000000..71236fb980e364
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shifts-to-and.mir
@@ -0,0 +1,142 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            test_signle_g_constant
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: test_signle_g_constant
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:_(s32) = COPY $w0
+    %2:_(s32) = G_CONSTANT i32 1
+    %3:_(s32) = G_LSHR %1, %2
+    %4:_(s32) = G_SHL %3, %2(s32)
+    $w0 = COPY %4
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_double_g_constant
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: test_double_g_constant
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:_(s32) = COPY $w0
+    %2:_(s32) = G_CONSTANT i32 1
+    %3:_(s32) = G_CONSTANT i32 1
+    %4:_(s32) = G_LSHR %1, %2
+    %5:_(s32) = G_SHL %4, %3(s32)
+    $w0 = COPY %5
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_const_2
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: test_const_2
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:_(s32) = COPY $w0
+    %2:_(s32) = G_CONSTANT i32 2
+    %3:_(s32) = G_LSHR %1, %2
+    %4:_(s32) = G_SHL %3, %2(s32)
+    $w0 = COPY %4
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_const_3
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: test_const_3
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -8
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:_(s32) = COPY $w0
+    %2:_(s32) = G_CONSTANT i32 3
+    %3:_(s32) = G_LSHR %1, %2
+    %4:_(s32) = G_SHL %3, %2(s32)
+    $w0 = COPY %4
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_const_4
+body:             |
+  bb.1:
+    liveins: $w0
+
+    ; CHECK-LABEL: name: test_const_4
+    ; CHECK: liveins: $w0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -16
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK-NEXT: $w0 = COPY [[AND]](s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %1:_(s32) = COPY $w0
+    %2:_(s32) = G_CONSTANT i32 4
+    %3:_(s32) = G_LSHR %1, %2
+    %4:_(s32) = G_SHL %3, %2(s32)
+    $w0 = COPY %4
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            test_vector
+body:             |
+  bb.1:
+    liveins: $w1, $x0
+
+    ; CHECK-LABEL: name: test_vector
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+    ; CHECK-NEXT: %xvec:_(<4 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY1]](s32), [[COPY1]](s32), [[COPY1]](s32)
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -4
+    ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+    ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s32>) = G_AND %xvec, [[BUILD_VECTOR]]
+    ; CHECK-NEXT: G_STORE [[AND]](<4 x s32>), [[COPY]](p0) :: (store (<4 x s32>))
+    ; CHECK-NEXT: RET_ReallyLR
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = COPY $w1
+    %xvec:_(<4 x s32>) = G_BUILD_VECTOR %1, %1, %1, %1
+    %2:_(s32) = G_CONSTANT i32 2
+    %veccst:_(<4 x s32>) = G_BUILD_VECTOR %2, %2, %2, %2
+    %3:_(<4 x s32>) = G_LSHR %xvec, %veccst
+    %5:_(<4 x s32>) = G_SHL %3, %veccst
+    G_STORE %5(<4 x s32>), %0(p0) :: (store (<4 x s32>))
+    RET_ReallyLR
+
+...



More information about the llvm-commits mailing list