[llvm] 15a24e1 - [GlobalISel] Combine mulo x, 2 -> addo x, x
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 28 16:59:59 PDT 2021
Author: Jessica Paquette
Date: 2021-09-28T16:59:43-07:00
New Revision: 15a24e1fdbc446bc3d0fa0bf6c2a1aa44281cc75
URL: https://github.com/llvm/llvm-project/commit/15a24e1fdbc446bc3d0fa0bf6c2a1aa44281cc75
DIFF: https://github.com/llvm/llvm-project/commit/15a24e1fdbc446bc3d0fa0bf6c2a1aa44281cc75.diff
LOG: [GlobalISel] Combine mulo x, 2 -> addo x, x
Similar to what SDAG does when it sees a smulo/umulo against 2
(see: `DAGCombiner::visitMULO`)
This pattern is fairly common in Swift code AFAICT.
Here's an example extracted from a Swift testcase:
https://godbolt.org/z/6cT8Mesx7
Differential Revision: https://reviews.llvm.org/D110662
Added:
llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5e3f3717952da..32d50703c931e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -609,6 +609,11 @@ class CombinerHelper {
/// and rename: s/bool tryEmit/void emit/
bool tryEmitMemcpyInline(MachineInstr &MI);
+ /// Match:
+ /// (G_UMULO x, 2) -> (G_UADDO x, x)
+ /// (G_SMULO x, 2) -> (G_SADDO x, x)
+ bool matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo);
+
private:
/// Given a non-indexed load or store instruction \p MI, find an offset that
/// can be usefully and legally folded into it as a post-indexing operation.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 09be6db7d46a6..b7bfdb69f0a50 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -702,6 +702,12 @@ def constant_fold : GICombineRule<
[{ return Helper.matchConstantFold(*${d}, ${matchinfo}); }]),
(apply [{ Helper.replaceInstWithConstant(*${d}, ${matchinfo}); }])>;
+def mulo_by_2: GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (wip_match_opcode G_UMULO, G_SMULO):$root,
+ [{ return Helper.matchMulOBy2(*${root}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFnNoErase(*${root}, ${matchinfo}); }])>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -718,7 +724,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
fneg_fneg_fold, right_identity_one]>;
def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
- overlapping_and]>;
+ overlapping_and, mulo_by_2]>;
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 17d256c2dca89..0515e44c6c947 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -4376,6 +4376,35 @@ bool CombinerHelper::matchNarrowBinopFeedingAnd(
return true;
}
+bool CombinerHelper::matchMulOBy2(MachineInstr &MI, BuildFnTy &MatchInfo) {
+ unsigned Opc = MI.getOpcode();
+ assert(Opc == TargetOpcode::G_UMULO || Opc == TargetOpcode::G_SMULO);
+ // Check for a constant 2 or a splat of 2 on the RHS.
+ auto RHS = MI.getOperand(3).getReg();
+ bool IsVector = MRI.getType(RHS).isVector();
+ if (!IsVector && !mi_match(MI.getOperand(3).getReg(), MRI, m_SpecificICst(2)))
+ return false;
+ if (IsVector) {
+ // FIXME: There's no mi_match pattern for this yet.
+ auto *RHSDef = getDefIgnoringCopies(RHS, MRI);
+ if (!RHSDef)
+ return false;
+ auto Splat = getBuildVectorConstantSplat(*RHSDef, MRI);
+ if (!Splat || *Splat != 2)
+ return false;
+ }
+
+ MatchInfo = [=, &MI](MachineIRBuilder &B) {
+ Observer.changingInstr(MI);
+ unsigned NewOpc = Opc == TargetOpcode::G_UMULO ? TargetOpcode::G_UADDO
+ : TargetOpcode::G_SADDO;
+ MI.setDesc(Builder.getTII().get(NewOpc));
+ MI.getOperand(3).setReg(MI.getOperand(2).getReg());
+ Observer.changedInstr(MI);
+ };
+ return true;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir
new file mode 100644
index 0000000000000..27486b5752d4f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mulo-with-2.mir
@@ -0,0 +1,117 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="mulo_by_2" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+# REQUIRES: asserts
+...
+---
+name: smulo_to_saddo
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: smulo_to_saddo
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+ ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_SADDO %copy, %copy
+ ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %copy:_(s64) = COPY $x0
+ %two:_(s64) = G_CONSTANT i64 2
+ %mul:_(s64), %o:_(s1) = G_SMULO %copy, %two
+ %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ $w0 = COPY %overflow_ext(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: umulo_to_uaddo
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: umulo_to_uaddo
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+ ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UADDO %copy, %copy
+ ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %copy:_(s64) = COPY $x0
+ %two:_(s64) = G_CONSTANT i64 2
+ %mul:_(s64), %o:_(s1) = G_UMULO %copy, %two
+ %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ $w0 = COPY %overflow_ext(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: vector
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $d0, $d1
+
+ ; CHECK-LABEL: name: vector
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<2 x s32>) = COPY $d0
+ ; CHECK-NEXT: %mul:_(<2 x s32>), %o:_(<2 x s1>) = G_SADDO %copy, %copy
+ ; CHECK-NEXT: %overflow_ext:_(<2 x s32>) = G_ANYEXT %o(<2 x s1>)
+ ; CHECK-NEXT: $d0 = COPY %overflow_ext(<2 x s32>)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %copy:_(<2 x s32>) = COPY $d0
+ %two:_(s32) = G_CONSTANT i32 2
+ %splat_two:_(<2 x s32>) = G_BUILD_VECTOR %two(s32), %two(s32)
+ %mul:_(<2 x s32>), %o:_(<2 x s1>) = G_SMULO %copy, %splat_two
+ %overflow_ext:_(<2 x s32>) = G_ANYEXT %o(<2 x s1>)
+ $d0 = COPY %overflow_ext(<2 x s32>)
+ RET_ReallyLR implicit $d0
+...
+---
+name: dont_combine_wrong_cst
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: dont_combine_wrong_cst
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $x0
+ ; CHECK-NEXT: %three:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UMULO %copy, %three
+ ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %copy:_(s64) = COPY $x0
+ %three:_(s64) = G_CONSTANT i64 3
+ %mul:_(s64), %o:_(s1) = G_UMULO %copy, %three
+ %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ $w0 = COPY %overflow_ext(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: dont_combine_not_cst
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: dont_combine_not_cst
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy1:_(s64) = COPY $x0
+ ; CHECK-NEXT: %copy2:_(s64) = COPY $x1
+ ; CHECK-NEXT: %mul:_(s64), %o:_(s1) = G_UMULO %copy1, %copy2
+ ; CHECK-NEXT: %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ ; CHECK-NEXT: $w0 = COPY %overflow_ext(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $w0
+ %copy1:_(s64) = COPY $x0
+ %copy2:_(s64) = COPY $x1
+ %mul:_(s64), %o:_(s1) = G_UMULO %copy1, %copy2
+ %overflow_ext:_(s32) = G_ZEXT %o(s1)
+ $w0 = COPY %overflow_ext(s32)
+ RET_ReallyLR implicit $w0
+...
More information about the llvm-commits
mailing list