[llvm] 15a24e1 - [GlobalISel] Combine mulo x, 2 -> addo x, x

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 28 16:59:59 PDT 2021


Author: Jessica Paquette
Date: 2021-09-28T16:59:43-07:00
New Revision: 15a24e1fdbc446bc3d0fa0bf6c2a1aa44281cc75

URL: https://github.com/llvm/llvm-project/commit/15a24e1fdbc446bc3d0fa0bf6c2a1aa44281cc75
DIFF: https://github.com/llvm/llvm-project/commit/15a24e1fdbc446bc3d0fa0bf6c2a1aa44281cc75.diff

LOG: [GlobalISel] Combine mulo x, 2 -> addo x, x

Similar to what SDAG does when it sees a smulo/umulo against 2
(see: `DAGCombiner::visitMULO`)

This pattern is fairly common in Swift code AFAICT.

Here's an example extracted from a Swift testcase:

https://godbolt.org/z/6cT8Mesx7

Differential Revision: https://reviews.llvm.org/D110662

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5e3f3717952da..32d50703c931e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -609,6 +609,11 @@ class CombinerHelper {
   ///       and rename: s/bool tryEmit/void emit/
   bool tryEmitMemcpyInline(MachineInstr &MI);
 
+  /// Match:
+  ///   (G_UMULO x, 2) -> (G_UADDO x, x)
+  ///   (G_SMULO x, 2) -> (G_SADDO x, x)
+  bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo);
+
 private:
   /// Given a non-indexed load or store instruction \p MI, find an offset that
   /// can be usefully and legally folded into it as a post-indexing operation.

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 09be6db7d46a6..b7bfdb69f0a50 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -702,6 +702,12 @@ def constant_fold : GICombineRule<
    [{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]),
   (apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
 
+def mulo_by_2: GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_UMULO, G_SMULO):$root,
+         [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -718,7 +724,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         fneg_fneg_fold, right_identity_one]>;
 
 def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
-                                     overlapping_and]>;
+                                     overlapping_and, mulo_by_2]>;
 
 def known_bits_simplifications : GICombineGroup<[
   redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 17d256c2dca89..0515e44c6c947 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4376,6 +4376,35 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
   return true;
 }
 
+bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
+  unsigned Opc = MI.getOpcode();
+  assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
+  // Check for a constant 2 or a splat of 2 on the RHS.
+  auto RHS = MI.getOperand(3).getReg();
+  bool IsVector = MRI.getType(RHS).isVector();
+  if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
+    return false;
+  if (IsVector) {
+    // FIXME: There's no mi_match pattern for this yet.
+    auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
+    if (!RHSDef)
+      return false;
+    auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
+    if (!Splat || *Splat != 2)
+      return false;
+  }
+
+  MatchInfo = [=, &MI](MachineIRBuilder &B) {
+    Observer.changingInstr(MI);
+    unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
+                                                   : TargetOpcode::G_SADDO;
+    MI.setDesc(Builder.getTII().get(NewOpc));
+    MI.getOperand(3).setReg(MI.getOperand(2).getReg());
+    Observer.changedInstr(MI);
+  };
+  return true;
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir
new file mode 100644
index 0000000000000..27486b5752d4f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir
@@ -0,0 +1,117 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="mulo_by_2" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+# REQUIRES: asserts
+...
+---
+name:            smulo_to_saddo
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: smulo_to_saddo
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+    ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_SADDO %copy, %copy
+    ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %copy:_(s64) = COPY $x0
+    %two:_(s64) = G_CONSTANT i64 2
+    %mul:_(s64), %o:_(s1) = G_SMULO %copy, %two
+    %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %overflow_ext(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            umulo_to_uaddo
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: umulo_to_uaddo
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+    ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UADDO %copy, %copy
+    ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %copy:_(s64) = COPY $x0
+    %two:_(s64) = G_CONSTANT i64 2
+    %mul:_(s64), %o:_(s1) = G_UMULO %copy, %two
+    %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %overflow_ext(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            vector
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0, $d1
+
+    ; CHECK-LABEL: name: vector
+    ; CHECK: liveins: $d0, $d1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(<2 x s32>) = COPY $d0
+    ; CHECK-NEXT: %mul:_(<2 x s32>), %o:_(<2 x s1>) = G_SADDO %copy, %copy
+    ; CHECK-NEXT: %overflow_ext:_(<2 x s32>) = G_ANYEXT %o(<2 x s1>)
+    ; CHECK-NEXT: $d0 = COPY %overflow_ext(<2 x s32>)
+    ; CHECK-NEXT: RET_ReallyLR implicit $d0
+    %copy:_(<2 x s32>) = COPY $d0
+    %two:_(s32) = G_CONSTANT i32 2
+    %splat_two:_(<2 x s32>) = G_BUILD_VECTOR %two(s32), %two(s32)
+    %mul:_(<2 x s32>), %o:_(<2 x s1>) = G_SMULO %copy, %splat_two
+    %overflow_ext:_(<2 x s32>) = G_ANYEXT %o(<2 x s1>)
+    $d0 = COPY %overflow_ext(<2 x s32>)
+    RET_ReallyLR implicit $d0
+...
+---
+name:            dont_combine_wrong_cst
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; CHECK-LABEL: name: dont_combine_wrong_cst
+    ; CHECK: liveins: $x0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+    ; CHECK-NEXT: %three:_(s64) = G_CONSTANT i64 3
+    ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UMULO %copy, %three
+    ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %copy:_(s64) = COPY $x0
+    %three:_(s64) = G_CONSTANT i64 3
+    %mul:_(s64), %o:_(s1) = G_UMULO %copy, %three
+    %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %overflow_ext(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            dont_combine_not_cst
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $x0, $x1
+
+    ; CHECK-LABEL: name: dont_combine_not_cst
+    ; CHECK: liveins: $x0, $x1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %copy1:_(s64) = COPY $x0
+    ; CHECK-NEXT: %copy2:_(s64) = COPY $x1
+    ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UMULO %copy1, %copy2
+    ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+    ; CHECK-NEXT: RET_ReallyLR implicit $w0
+    %copy1:_(s64) = COPY $x0
+    %copy2:_(s64) = COPY $x1
+    %mul:_(s64), %o:_(s1) = G_UMULO %copy1, %copy2
+    %overflow_ext:_(s32) = G_ZEXT %o(s1)
+    $w0 = COPY %overflow_ext(s32)
+    RET_ReallyLR implicit $w0
+...


        


More information about the llvm-commits mailing list