[llvm] a2ab765 - [GISel] Eliminate redundant bitmasking
Jon Roelofs via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 17 12:53:13 PDT 2021
Author: Jon Roelofs
Date: 2021-06-17T12:53:00-07:00
New Revision: a2ab765029dd76756e98851847e6675795a973b6
URL: https://github.com/llvm/llvm-project/commit/a2ab765029dd76756e98851847e6675795a973b6
DIFF: https://github.com/llvm/llvm-project/commit/a2ab765029dd76756e98851847e6675795a973b6.diff
LOG: [GISel] Eliminate redundant bitmasking
This was a GISel vs SDAG regression that showed up at -Os on arm64 in:
SingleSource/Benchmarks/Adobe-C++/simple_types_constant_folding.test
https://llvm.godbolt.org/z/aecjodsjG
Differential revision: https://reviews.llvm.org/D103334
Added:
llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 1b4f496ce6ca9..90697306b5281 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -435,6 +435,11 @@ class CombinerHelper {
std::tuple<Register, int64_t> &MatchInfo);
bool applyAshShlToSextInreg(MachineInstr &MI,
std::tuple<Register, int64_t> &MatchInfo);
+
+ /// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
+ bool matchOverlappingAnd(MachineInstr &MI,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
+
/// \return true if \p MI is a G_AND instruction whose operands are x and y
/// where x & y == x or x & y == y. (E.g., one of operands is all-ones value.)
///
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 3846997f0e8a8..3ced5dc793da9 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -402,6 +402,15 @@ def shl_ashr_to_sext_inreg : GICombineRule<
[{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
(apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
>;
+
+// Fold and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
+def overlapping_and: GICombineRule <
+ (defs root:$root, build_fn_matchinfo:$info),
+ (match (wip_match_opcode G_AND):$root,
+ [{ return Helper.matchOverlappingAnd(*${root}, ${info}); }]),
+ (apply [{ return Helper.applyBuildFn(*${root}, ${info}); }])
+>;
+
// Fold (x & y) -> x or (x & y) -> y when (x & y) is known to equal x or equal y.
def redundant_and: GICombineRule <
(defs root:$root, register_matchinfo:$matchinfo),
@@ -643,7 +652,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
i2p_to_p2i, anyext_trunc_fold,
fneg_fneg_fold, right_identity_one]>;
-def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p]>;
+def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
+ overlapping_and]>;
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index d510676fc6102..b7aea819f712d 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2997,6 +2997,33 @@ bool CombinerHelper::applyAshShlToSextInreg(
return true;
}
+/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
+bool CombinerHelper::matchOverlappingAnd(
+ MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_AND);
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ Register R;
+ int64_t C1;
+ int64_t C2;
+ if (!mi_match(
+ Dst, MRI,
+ m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ if (C1 & C2) {
+ B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
+ return;
+ }
+ auto Zero = B.buildConstant(Ty, 0);
+ replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
+ };
+ return true;
+}
+
bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
Register &Replacement) {
// Given
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir
new file mode 100644
index 0000000000000..ab8f3f11e0e60
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/opt-overlapping-and.mir
@@ -0,0 +1,121 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -debugify-and-strip-all-safe -mtriple arm64-apple-ios -O0 -run-pass=aarch64-prelegalizer-combiner --aarch64prelegalizercombinerhelper-only-enable-rule="overlapping_and" -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+# REQUIRES: asserts
+---
+name: bitmask_overlap1
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: bitmask_overlap1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: $w0 = COPY [[AND]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 -128
+ %3:_(s32) = G_CONSTANT i32 255
+ %2:_(s32) = G_AND %0, %1
+ %4:_(s32) = G_AND %2, %3
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: bitmask_overlap2
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: bitmask_overlap2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: $w0 = COPY [[AND]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 255
+ %3:_(s32) = G_CONSTANT i32 -128
+ %2:_(s32) = G_AND %1, %0
+ %4:_(s32) = G_AND %2, %3
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: bitmask_overlap3
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: bitmask_overlap3
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: $w0 = COPY [[AND]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 255
+ %3:_(s32) = G_CONSTANT i32 -128
+ %2:_(s32) = G_AND %1, %0
+ %4:_(s32) = G_AND %3, %2
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: bitmask_overlap4
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: bitmask_overlap4
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: $w0 = COPY [[AND]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 255
+ %3:_(s32) = G_CONSTANT i32 -128
+ %2:_(s32) = G_AND %0, %1
+ %4:_(s32) = G_AND %3, %2
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: bitmask_no_overlap
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: bitmask_no_overlap
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: $w0 = COPY [[C]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = G_CONSTANT i32 1
+ %3:_(s32) = G_CONSTANT i32 2
+ %2:_(s32) = G_AND %0, %1
+ %4:_(s32) = G_AND %2, %3
+ $w0 = COPY %4(s32)
+ RET_ReallyLR implicit $w0
+
+...
+---
+name: bitmask_overlap_extrause
+body: |
+ bb.1:
+ ; CHECK-LABEL: name: bitmask_overlap_extrause
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(p0) = COPY $x1
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 128
+ ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]]
+ ; CHECK: G_STORE [[AND]](s32), [[COPY1]](p0) :: (store 4)
+ ; CHECK: $w0 = COPY [[AND1]](s32)
+ ; CHECK: RET_ReallyLR implicit $w0
+ %0:_(s32) = COPY $w0
+ %1:_(p0) = COPY $x1
+ %2:_(s32) = G_CONSTANT i32 255
+ %4:_(s32) = G_CONSTANT i32 -128
+ %3:_(s32) = G_AND %0, %2
+ %5:_(s32) = G_AND %3, %4
+ G_STORE %3(s32), %1(p0) :: (store 4)
+ $w0 = COPY %5(s32)
+ RET_ReallyLR implicit $w0
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index a6c2ea7a0bc4e..890a9b3e36447 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -1151,7 +1151,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; SI-NEXT: v_and_b32_e32 v3, s6, v1
+; SI-NEXT: v_and_b32_e32 v3, s6, v3
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
; SI-NEXT: s_mov_b32 s4, 0
@@ -1183,7 +1183,7 @@ define float @v_test_sitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; VI-NEXT: v_and_b32_e32 v3, s6, v1
+; VI-NEXT: v_and_b32_e32 v3, s6, v3
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
; VI-NEXT: s_mov_b32 s4, 0
@@ -1218,7 +1218,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
; SI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; SI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
; SI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; SI-NEXT: v_and_b32_e32 v3, s4, v1
+; SI-NEXT: v_and_b32_e32 v3, s4, v3
; SI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; SI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
; SI-NEXT: s_mov_b32 s4, 0
@@ -1248,7 +1248,7 @@ define float @v_test_uitofp_i64_byte_to_f32(i64 %arg0) {
; VI-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1]
; VI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v3
; VI-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc
-; VI-NEXT: v_and_b32_e32 v3, s4, v1
+; VI-NEXT: v_and_b32_e32 v3, s4, v3
; VI-NEXT: v_lshrrev_b32_e32 v1, 8, v1
; VI-NEXT: v_lshlrev_b32_e32 v0, 23, v0
; VI-NEXT: s_mov_b32 s4, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
index 02370acc055a1..d42c290d00bae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
@@ -547,21 +547,18 @@ define amdgpu_ps i32 @s_shl_i32_zext_i16(i16 inreg %x) {
;
; GFX8-LABEL: s_shl_i32_zext_i16:
; GFX8: ; %bb.0:
-; GFX8-NEXT: s_and_b32 s0, s0, 0xffff
; GFX8-NEXT: s_and_b32 s0, s0, 0x3fff
; GFX8-NEXT: s_lshl_b32 s0, s0, 2
; GFX8-NEXT: ; return to shader part epilog
;
; GFX9-LABEL: s_shl_i32_zext_i16:
; GFX9: ; %bb.0:
-; GFX9-NEXT: s_and_b32 s0, s0, 0xffff
; GFX9-NEXT: s_and_b32 s0, s0, 0x3fff
; GFX9-NEXT: s_lshl_b32 s0, s0, 2
; GFX9-NEXT: ; return to shader part epilog
;
; GFX10-LABEL: s_shl_i32_zext_i16:
; GFX10: ; %bb.0:
-; GFX10-NEXT: s_and_b32 s0, s0, 0xffff
; GFX10-NEXT: s_and_b32 s0, s0, 0x3fff
; GFX10-NEXT: s_lshl_b32 s0, s0, 2
; GFX10-NEXT: ; return to shader part epilog
More information about the llvm-commits
mailing list