[llvm] fb36ab0 - [GlobalISel] Expand combine for (x & mask) -> x when (x & mask) == x

Mirko Brkusanin via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 10 02:37:14 PST 2020


Author: Mirko Brkusanin
Date: 2020-11-10T11:32:13+01:00
New Revision: fb36ab0a42f4ea8909c19c9e6759be5d617aef55

URL: https://github.com/llvm/llvm-project/commit/fb36ab0a42f4ea8909c19c9e6759be5d617aef55
DIFF: https://github.com/llvm/llvm-project/commit/fb36ab0a42f4ea8909c19c9e6759be5d617aef55.diff

LOG: [GlobalISel] Expand combine for (x & mask) -> x when (x & mask) == x

We can use KnownBitsAnalysis to cover cases when mask is not trivial. It can
also help with cases when mask is not constant but can still be folded into
one. Since 'and' is comutative we should treat both operands as possible
replacements.

Differential Revision: https://reviews.llvm.org/D90674

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/lib/Target/AArch64/AArch64Combine.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index a807f7c0a8bc..4deefc497d6d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -423,13 +423,13 @@ class CombinerHelper {
                                std::tuple<Register, int64_t> &MatchInfo);
   bool applyAshShlToSextInreg(MachineInstr &MI,
                               std::tuple<Register, int64_t> &MatchInfo);
-  /// \return true if \p MI is a G_AND instruction whose RHS is a mask where
-  /// LHS & mask == LHS. (E.g., an all-ones value.)
+  /// \return true if \p MI is a G_AND instruction whose operands are x and y
+  /// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
   ///
   /// \param [in] MI - The G_AND instruction.
   /// \param [out] Replacement - A register the G_AND should be replaced with on
   /// success.
-  bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement);
+  bool matchRedundantAnd(MachineInstr &MI, Register &Replacement);
 
   /// \return true if \p MI is a G_SEXT_INREG that can be erased.
   bool matchRedundantSExtInReg(MachineInstr &MI);

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a19b0059b51a..fc1718042290 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -381,12 +381,12 @@ def shl_ashr_to_sext_inreg : GICombineRule<
     [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
   (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
 >;
-// Fold (x & mask) -> x when (x & mask) is known to equal x.
-def and_trivial_mask_matchinfo : GIDefMatchData<"Register">;
-def and_trivial_mask: GICombineRule <
-  (defs root:$root, and_trivial_mask_matchinfo:$matchinfo),
+// Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y.
+def redundant_and_matchinfo : GIDefMatchData<"Register">;
+def redundant_and: GICombineRule <
+  (defs root:$root, redundant_and_matchinfo:$matchinfo),
   (match (wip_match_opcode G_AND):$root,
-         [{ return Helper.matchAndWithTrivialMask(*${root}, ${matchinfo}); }]),
+         [{ return Helper.matchRedundantAnd(*${root}, ${matchinfo}); }]),
   (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
 >;
 
@@ -551,7 +551,7 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
 def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p]>;
 
 def known_bits_simplifications : GICombineGroup<[
-  and_trivial_mask, redundant_sext_inreg]>;
+  redundant_and, redundant_sext_inreg]>;
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
 

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 83fbbed2138d..cee43e5ca181 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2833,15 +2833,15 @@ bool CombinerHelper::applyAshShlToSextInreg(
   return true;
 }
 
-bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
-                                             Register &Replacement) {
+bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
+                                       Register &Replacement) {
   // Given
   //
-  // %mask:_(sN) = G_CONSTANT iN 000...0111...1
+  // %y:_(sN) = G_SOMETHING
   // %x:_(sN) = G_SOMETHING
-  // %y:_(sN) = G_AND %x, %mask
+  // %res:_(sN) = G_AND %x, %y
   //
-  // Eliminate the G_AND when it is known that x & mask == x.
+  // Eliminate the G_AND when it is known that x & y == x or x & y == y.
   //
   // Patterns like this can appear as a result of legalization. E.g.
   //
@@ -2854,29 +2854,38 @@ bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
   if (!KB)
     return false;
 
-  // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the
-  // LHS of the G_AND.
-  Replacement = MI.getOperand(1).getReg();
   Register AndDst = MI.getOperand(0).getReg();
   LLT DstTy = MRI.getType(AndDst);
 
   // FIXME: This should be removed once GISelKnownBits supports vectors.
   if (DstTy.isVector())
     return false;
-  if (!canReplaceReg(AndDst, Replacement, MRI))
-    return false;
 
-  // Check that we have a constant on the RHS of the G_AND, which is of the form
-  // 000...0111...1.
-  int64_t Cst;
-  if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst)))
-    return false;
-  APInt Mask(DstTy.getSizeInBits(), Cst);
-  if (!Mask.isMask())
-    return false;
+  Register LHS = MI.getOperand(1).getReg();
+  Register RHS = MI.getOperand(2).getReg();
+  KnownBits LHSBits = KB->getKnownBits(LHS);
+  KnownBits RHSBits = KB->getKnownBits(RHS);
+
+  // Check that x & Mask == x.
+  // x & 1 == x, always
+  // x & 0 == x, only if x is also 0
+  // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
+  //
+  // Check if we can replace AndDst with the LHS of the G_AND
+  if (canReplaceReg(AndDst, LHS, MRI) &&
+      (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
+    Replacement = LHS;
+    return true;
+  }
 
-  // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0.
-  return KB->maskedValueIsZero(Replacement, ~Mask);
+  // Check if we can replace AndDst with the RHS of the G_AND
+  if (canReplaceReg(AndDst, RHS, MRI) &&
+      (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
+    Replacement = RHS;
+    return true;
+  }
+
+  return false;
 }
 
 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {

diff  --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 7e7db2f5df0b..bad2c9919b9e 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -127,7 +127,7 @@ def AArch64PostLegalizerCombinerHelper
                        [copy_prop, erase_undef_store, combines_for_extload,
                         sext_trunc_sextload,
                         hoist_logic_op_with_same_opcode_hands,
-                        and_trivial_mask, xor_of_and_with_same_reg,
+                        redundant_and, xor_of_and_with_same_reg,
                         extractvecelt_pairwise_add]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir
new file mode 100644
index 000000000000..0f301238561c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-and.mir
@@ -0,0 +1,144 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            test_const_const
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_const_const
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: $sgpr0 = COPY [[C]](s32)
+    ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+    %0:_(s32) = G_CONSTANT i32 15
+    %1:_(s32) = G_CONSTANT i32 255
+    %2:_(s32) = G_AND %0(s32), %1(s32)
+    $sgpr0 = COPY %2(s32)
+    SI_RETURN_TO_EPILOG implicit $sgpr0
+...
+
+---
+name:            test_const_const_2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_const_const_2
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: $sgpr0 = COPY [[C]](s32)
+    ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+    %0:_(s32) = G_CONSTANT i32 255
+    %1:_(s32) = G_CONSTANT i32 15
+    %2:_(s32) = G_AND %0(s32), %1(s32)
+    $sgpr0 = COPY %2(s32)
+    SI_RETURN_TO_EPILOG implicit $sgpr0
+...
+
+---
+name:            test_const_const_3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_const_const_3
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1431655766
+    ; CHECK: $vgpr0 = COPY [[C]](s32)
+    ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
+    %0:_(s32) = G_CONSTANT i32 2863311530
+    %1:_(s32) = G_CONSTANT i32 4008636142
+    %2:_(s32) = G_AND %0(s32), %1(s32)
+    $vgpr0 = COPY %2(s32)
+    SI_RETURN_TO_EPILOG implicit $vgpr0
+...
+
+---
+name:            test_and_and
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_and_and
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+    ; CHECK: $vgpr0 = COPY [[AND]](s32)
+    ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_CONSTANT i32 15
+    %2:_(s32) = G_CONSTANT i32 255
+    %3:_(s32) = G_AND %0, %1(s32)
+    %4:_(s32) = G_AND %3, %2
+    $vgpr0 = COPY %4(s32)
+    SI_RETURN_TO_EPILOG implicit $vgpr0
+...
+
+---
+name:            test_shl_and
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_shl_and
+    ; CHECK: liveins: $sgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY]], [[C]](s32)
+    ; CHECK: $sgpr0 = COPY [[SHL]](s32)
+    ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_CONSTANT i32 5
+    %2:_(s32) = G_CONSTANT i32 4294967264
+    %3:_(s32) = G_SHL %0, %1(s32)
+    %4:_(s32) = G_AND %3, %2
+    $sgpr0 = COPY %4(s32)
+    SI_RETURN_TO_EPILOG implicit $sgpr0
+...
+
+---
+name:            test_lshr_and
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_lshr_and
+    ; CHECK: liveins: $vgpr0
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK: $vgpr0 = COPY [[LSHR]](s32)
+    ; CHECK: SI_RETURN_TO_EPILOG implicit $vgpr0
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_CONSTANT i32 5
+    %2:_(s32) = G_CONSTANT i32 134217727
+    %3:_(s32) = G_LSHR %0, %1(s32)
+    %4:_(s32) = G_AND %3, %2
+    $vgpr0 = COPY %4(s32)
+    SI_RETURN_TO_EPILOG implicit $vgpr0
+...
+
+---
+name:            test_and_non_const
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+
+    ; CHECK-LABEL: name: test_and_non_const
+    ; CHECK: liveins: $sgpr0, $sgpr1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK: $sgpr0 = COPY [[LSHR]](s32)
+    ; CHECK: SI_RETURN_TO_EPILOG implicit $sgpr0
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = COPY $sgpr1
+    %2:_(s32) = G_CONSTANT i32 16
+    %3:_(s32) = G_CONSTANT i32 65535
+    %4:_(s32) = G_OR %1, %3
+    %5:_(s32) = G_LSHR %0, %2(s32)
+    %6:_(s32) = G_AND %5, %4
+    $sgpr0 = COPY %6(s32)
+    SI_RETURN_TO_EPILOG implicit $sgpr0
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll
index 0c7672123f0b..c8a1e08cb853 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-shift-of-shifted-logic.ll
@@ -5,7 +5,6 @@ define amdgpu_cs i32 @test_shl_and_1(i32 inreg %arg1) {
 ; CHECK-LABEL: test_shl_and_1:
 ; CHECK:       ; %bb.0: ; %.entry
 ; CHECK-NEXT:    s_lshl_b32 s0, s0, 4
-; CHECK-NEXT:    s_and_b32 s0, s0, -16
 ; CHECK-NEXT:    ; return to shader part epilog
 .entry:
   %z1 = shl i32 %arg1, 2
@@ -18,7 +17,6 @@ define amdgpu_cs i32 @test_shl_and_2(i32 inreg %arg1) {
 ; CHECK-LABEL: test_shl_and_2:
 ; CHECK:       ; %bb.0: ; %.entry
 ; CHECK-NEXT:    s_lshl_b32 s0, s0, 8
-; CHECK-NEXT:    s_and_b32 s0, s0, 0xffffff00
 ; CHECK-NEXT:    ; return to shader part epilog
 .entry:
   %z1 = shl i32 %arg1, 5
@@ -44,7 +42,6 @@ define amdgpu_cs i32 @test_lshr_and_1(i32 inreg %arg1) {
 ; CHECK-LABEL: test_lshr_and_1:
 ; CHECK:       ; %bb.0: ; %.entry
 ; CHECK-NEXT:    s_lshr_b32 s0, s0, 4
-; CHECK-NEXT:    s_and_b32 s0, s0, 0xfffffff
 ; CHECK-NEXT:    ; return to shader part epilog
 .entry:
   %z1 = lshr i32 %arg1, 2
@@ -70,7 +67,6 @@ define amdgpu_cs i32 @test_lshr_and_3(i32 inreg %arg1) {
 ; CHECK-LABEL: test_lshr_and_3:
 ; CHECK:       ; %bb.0: ; %.entry
 ; CHECK-NEXT:    s_lshr_b32 s0, s0, 5
-; CHECK-NEXT:    s_and_b32 s0, s0, 0x7ffffff
 ; CHECK-NEXT:    ; return to shader part epilog
 .entry:
   %z1 = lshr i32 %arg1, 3


        


More information about the llvm-commits mailing list