[llvm] d25b12b - [GlobalISel] Add combine for (x & mask) -> x when (x & mask) == x

Jessica Paquette via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 19 10:21:34 PDT 2020


Author: Jessica Paquette
Date: 2020-08-19T10:20:57-07:00
New Revision: d25b12bdc307264e353f1a3c4ccd65c0b45b2f86

URL: https://github.com/llvm/llvm-project/commit/d25b12bdc307264e353f1a3c4ccd65c0b45b2f86
DIFF: https://github.com/llvm/llvm-project/commit/d25b12bdc307264e353f1a3c4ccd65c0b45b2f86.diff

LOG: [GlobalISel] Add combine for (x & mask) -> x when (x & mask) == x

If we have a mask, and a value x, where (x & mask) == x, we can drop the AND
and just use x.

This is about a 0.4% geomean code size improvement on CTMark at -O3 for AArch64.

In AArch64, this is most useful post-legalization. Patterns like this often
show up when legalizing s1s, which must be extended to larger types.

e.g.

```
%cmp:_(s32) = G_ICMP ...
%and:_(s32) = G_AND %cmp, 1
```

Since G_ICMP only produces a single bit, there's no reason to mask it with the
G_AND.

Differential Revision: https://reviews.llvm.org/D85463

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/lib/Target/AArch64/AArch64Combine.td

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 61af8cd15f11..6e636703548d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -263,6 +263,9 @@ class CombinerHelper {
   /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand.
   bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
 
+  /// Delete \p MI and replace all of its uses with \p Replacement.
+  bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
+
   /// Return true if \p MOP1 and \p MOP2 are register operands are defined by
   /// equivalent instructions.
   bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
@@ -303,6 +306,13 @@ class CombinerHelper {
                                std::tuple<Register, int64_t> &MatchInfo);
   bool applyAshShlToSextInreg(MachineInstr &MI,
                               std::tuple<Register, int64_t> &MatchInfo);
+  /// \return true if \p MI is a G_AND instruction whose RHS is a mask where
+  /// LHS & mask == LHS. (E.g., an all-ones value.)
+  ///
+  /// \param [in] MI - The G_AND instruction.
+  /// \param [out] Reg - A register the G_AND should be replaced with on
+  /// success.
+  bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement);
 
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 2e85c6064a12..baaf875881d5 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -299,6 +299,14 @@ def shl_ashr_to_sext_inreg : GICombineRule<
     [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
   (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
 >;
+// Fold (x & mask) -> x when (x & mask) is known to equal x.
+def and_trivial_mask_matchinfo : GIDefMatchData<"Register">;
+def and_trivial_mask: GICombineRule <
+  (defs root:$root, and_trivial_mask_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_AND):$root,
+         [{ return Helper.matchAndWithTrivialMask(*${root}, ${matchinfo}); }]),
+  (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
+>;
 
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 70e201068f5c..03086fa3532d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1766,6 +1766,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
   return true;
 }
 
+bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
+                                                 Register Replacement) {
+  assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
+  Register OldReg = MI.getOperand(0).getReg();
+  assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
+  MI.eraseFromParent();
+  replaceRegWith(MRI, OldReg, Replacement);
+  return true;
+}
+
 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
   // Match (cond ? x : x)
@@ -1979,6 +1989,52 @@ bool CombinerHelper::applyAshShlToSextInreg(
   return true;
 }
 
+bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
+                                             Register &Replacement) {
+  // Given
+  //
+  // %mask:_(sN) = G_CONSTANT iN 000...0111...1
+  // %x:_(sN) = G_SOMETHING
+  // %y:_(sN) = G_AND %x, %mask
+  //
+  // Eliminate the G_AND when it is known that x & mask == x.
+  //
+  // Patterns like this can appear as a result of legalization. E.g.
+  //
+  // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
+  // %one:_(s32) = G_CONSTANT i32 1
+  // %and:_(s32) = G_AND %cmp, %one
+  //
+  // In this case, G_ICMP only produces a single bit, so x & 1 == x.
+  assert(MI.getOpcode() == TargetOpcode::G_AND);
+  if (!KB)
+    return false;
+
+  // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the
+  // LHS of the G_AND.
+  Replacement = MI.getOperand(1).getReg();
+  Register AndDst = MI.getOperand(0).getReg();
+  LLT DstTy = MRI.getType(AndDst);
+
+  // FIXME: This should be removed once GISelKnownBits supports vectors.
+  if (DstTy.isVector())
+    return false;
+  if (!canReplaceReg(AndDst, Replacement, MRI))
+    return false;
+
+  // Check that we have a constant on the RHS of the G_AND, which is of the form
+  // 000...0111...1.
+  int64_t Cst;
+  if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst)))
+    return false;
+  APInt Mask(DstTy.getSizeInBits(), Cst);
+  if (!Mask.isMask())
+    return false;
+
+  // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0.
+  return KB->maskedValueIsZero(Replacement, ~Mask);
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;

diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 8f7afbace999..5fa44606488b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -80,6 +80,7 @@ def AArch64PostLegalizerCombinerHelper
     : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
                        [copy_prop, erase_undef_store, combines_for_extload,
                         sext_trunc_sextload, shuffle_vector_pseudos,
-                        hoist_logic_op_with_same_opcode_hands]> {
+                        hoist_logic_op_with_same_opcode_hands,
+                        and_trivial_mask]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir
new file mode 100644
index 000000000000..61e789d913ab
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/postlegalizer-combiner-and-trivial-mask.mir
@@ -0,0 +1,222 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+#
+# Check that we can fold (x & mask) -> x when (x & mask) is known to equal x.
+#
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+name:            remove_and_with_one_bit
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; G_ICMP produces a single bit. The mask is 1.
+    ;
+    ; cmp = 000...0?
+    ; mask = 000...01
+    ; cmp & mask = 000...0?
+    ;
+    ; Remove the G_AND.
+    ;
+    ; CHECK-LABEL: name: remove_and_with_one_bit
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
+    ; CHECK: $w0 = COPY %cmp(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
+    %mask:_(s32) = G_CONSTANT i32 1
+    %and:_(s32) = G_AND %cmp(s32), %mask
+    $w0 = COPY %and(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            remove_and_all_ones_mask
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; -1 is all ones. Therefore z & -1 = z. Remove the G_AND.
+    ;
+    ; CHECK-LABEL: name: remove_and_all_ones_mask
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %z:_(s32) = COPY $w2
+    ; CHECK: $w0 = COPY %z(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = COPY $w2
+    %mask:_(s32) = G_CONSTANT i32 -1
+    %and:_(s32) = G_AND %z(s32), %mask
+    $w0 = COPY %and(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            remove_and_all_ones_zext
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; %z is a s32, so it can be at most the all-ones value on 32 bits.
+    ; In decimal this is 4294967295. Any zero-extension of %z is at most this
+    ; value.
+    ;
+    ; Therefore, zext(z) & 4294967295 == z. Remove the G_AND.
+    ;
+    ; CHECK-LABEL: name: remove_and_all_ones_zext
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %z:_(s32) = COPY $w2
+    ; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
+    ; CHECK: $x0 = COPY %ext(s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = COPY $w2
+    %ext:_(s64) = G_ZEXT %z
+    %mask:_(s64) = G_CONSTANT i64 4294967295
+    %and:_(s64) = G_AND %ext(s64), %mask
+    $x0 = COPY %and(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            remove_and_all_ones_anyext
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; This is the same as the zext case.
+    ;
+    ; CHECK-LABEL: name: remove_and_all_ones_anyext
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %z:_(s32) = COPY $w2
+    ; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
+    ; CHECK: $x0 = COPY %ext(s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = COPY $w2
+    %ext:_(s64) = G_ZEXT %z
+    %mask:_(s64) = G_CONSTANT i64 4294967295
+    %and:_(s64) = G_AND %ext(s64), %mask
+    $x0 = COPY %and(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            dont_remove_all_ones_sext
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; We don't know if the sign bit is set on %z. So, the value in %ext may have
+    ; higher bits set than 4294967295.
+    ;
+    ; CHECK-LABEL: name: dont_remove_all_ones_sext
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %z:_(s32) = COPY $w2
+    ; CHECK: %ext:_(s64) = G_SEXT %z(s32)
+    ; CHECK: %mask:_(s64) = G_CONSTANT i64 4294967295
+    ; CHECK: %and:_(s64) = G_AND %ext, %mask
+    ; CHECK: $x0 = COPY %and(s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = COPY $w2
+    %ext:_(s64) = G_SEXT %z
+    %mask:_(s64) = G_CONSTANT i64 4294967295
+    %and:_(s64) = G_AND %ext(s64), %mask
+    $x0 = COPY %and(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            remove_and_positive_constant_sext
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; We know the sign bit is not set on %z. Therefore,
+    ;
+    ; z = ext = 42 = 000...0101010
+    ; mask = 0000...0111111
+    ;
+    ; So z & mask == z
+    ; CHECK-LABEL: name: remove_and_positive_constant_sext
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %z:_(s32) = G_CONSTANT i32 42
+    ; CHECK: %ext:_(s64) = G_SEXT %z(s32)
+    ; CHECK: $x0 = COPY %ext(s64)
+    ; CHECK: RET_ReallyLR implicit $x0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = G_CONSTANT i32 42
+    %ext:_(s64) = G_SEXT %z
+    %mask:_(s64) = G_CONSTANT i64 63
+    %and:_(s64) = G_AND %ext(s64), %mask
+    $x0 = COPY %and(s64)
+    RET_ReallyLR implicit $x0
+
+...
+---
+name:            not_a_mask
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1
+    ; 6 is not a mask, so we should still have the G_AND.
+    ;
+    ; CHECK-LABEL: name: not_a_mask
+    ; CHECK: liveins: $w0, $w1
+    ; CHECK: %x:_(s32) = COPY $w0
+    ; CHECK: %y:_(s32) = COPY $w1
+    ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
+    ; CHECK: %mask:_(s32) = G_CONSTANT i32 6
+    ; CHECK: %and:_(s32) = G_AND %cmp, %mask
+    ; CHECK: $w0 = COPY %and(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
+    %mask:_(s32) = G_CONSTANT i32 6
+    %and:_(s32) = G_AND %cmp(s32), %mask
+    $w0 = COPY %and(s32)
+    RET_ReallyLR implicit $w0
+
+...
+---
+name:            unknown_val
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $w0, $w1, $w2
+    ; We don't know what's in $w2, so we can't remove the G_AND without a mask
+    ; that fills every bit in the type.
+    ;
+    ; CHECK-LABEL: name: unknown_val
+    ; CHECK: liveins: $w0, $w1, $w2
+    ; CHECK: %z:_(s32) = COPY $w2
+    ; CHECK: %one:_(s32) = G_CONSTANT i32 32
+    ; CHECK: %and:_(s32) = G_AND %z, %one
+    ; CHECK: $w0 = COPY %and(s32)
+    ; CHECK: RET_ReallyLR implicit $w0
+    %x:_(s32) = COPY $w0
+    %y:_(s32) = COPY $w1
+    %z:_(s32) = COPY $w2
+    %one:_(s32) = G_CONSTANT i32 32
+    %and:_(s32) = G_AND %z(s32), %one
+    $w0 = COPY %and(s32)
+    RET_ReallyLR implicit $w0
+...


        


More information about the llvm-commits mailing list