[llvm] 3c36de5 - GlobalISel: Try to fold G_SEXT_INREG to G_AND with mask
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 18 16:51:40 PST 2022
Author: Matt Arsenault
Date: 2022-11-18T16:51:34-08:00
New Revision: 3c36de55f5e60dee8f1bc04bd201f6dd762b3423
URL: https://github.com/llvm/llvm-project/commit/3c36de55f5e60dee8f1bc04bd201f6dd762b3423
DIFF: https://github.com/llvm/llvm-project/commit/3c36de55f5e60dee8f1bc04bd201f6dd762b3423.diff
LOG: GlobalISel: Try to fold G_SEXT_INREG to G_AND with mask
Copies the same transform from the DAG. Helps eliminate some codegen
diffs to allowed shared checks in a future change.
Not sure if apply supports anything better than C++ fragments for
the result. It's also not really reasonable that every combine has
to set the default insertion point.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5c54f0e8ab058..0f7bd93aef714 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -129,6 +129,10 @@ class CombinerHelper {
return KB;
}
+ MachineIRBuilder &getBuilder() const {
+ return Builder;
+ }
+
const TargetLowering &getTargetLowering() const;
/// \returns true if the combiner is running pre-legalization.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index a737b1cd15063..f65b1ed3bc876 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -169,6 +169,22 @@ def sext_inreg_of_load : GICombineRule<
[{ return Helper.matchSextInRegOfLoad(*${root}, ${matchinfo}); }]),
(apply [{ Helper.applySextInRegOfLoad(*${root}, ${matchinfo}); }])>;
+def sext_inreg_to_zext_inreg : GICombineRule<
+ (defs root:$dst),
+ (match
+ (G_SEXT_INREG $dst, $src, $imm):$root,
+ [{
+ unsigned BitWidth = MRI.getType(${src}.getReg()).getScalarSizeInBits();
+ return Helper.getKnownBits()->maskedValueIsZero(${src}.getReg(),
+ APInt::getOneBitSet(BitWidth, ${imm}.getImm() - 1)); }]),
+ (apply [{
+ Helper.getBuilder().setInstrAndDebugLoc(*${root});
+ Helper.getBuilder().buildZExtInReg(${dst}, ${src}, ${imm}.getImm());
+ ${root}->eraseFromParent();
+ return true;
+ }])
+>;
+
def combine_indexed_load_store : GICombineRule<
(defs root:$root, indexed_load_store_matchdata:$matchinfo),
(match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
@@ -1030,7 +1046,8 @@ def const_combines : GICombineGroup<[constant_fp_op, const_ptradd_to_i2p,
def known_bits_simplifications : GICombineGroup<[
redundant_and, redundant_sext_inreg, redundant_or, urem_pow2_to_mask,
- zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits]>;
+ zext_trunc_fold, icmp_to_true_false_known_bits, icmp_to_lhs_known_bits,
+ sext_inreg_to_zext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend,
narrow_binop_feeding_and]>;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir
new file mode 100644
index 0000000000000..c6e7853992d96
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/prelegalizer-combiner-sext_inreg-to-and.mir
@@ -0,0 +1,111 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: sext_inreg_i32_8_and_neg255
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: sext_inreg_i32_8_and_neg255
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: %inreg:_(s32) = G_AND %load, [[C]]
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %ptr:_(p1) = COPY $vgpr0_vgpr1
+ %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4)
+ %mask:_(s32) = G_CONSTANT i32 -255
+ %and:_(s32) = G_AND %load, %mask
+ %inreg:_(s32) = G_SEXT_INREG %and, 8
+ $vgpr0 = COPY %inreg
+
+...
+
+---
+name: sext_inreg_i32_8_and_255
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: sext_inreg_i32_8_and_255
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %load:_(s32) = G_LOAD %ptr(p1) :: (volatile load (s32), addrspace 1)
+ ; CHECK-NEXT: %mask:_(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: %and:_(s32) = G_AND %load, %mask
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %and, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %ptr:_(p1) = COPY $vgpr0_vgpr1
+ %load:_(s32) = G_LOAD %ptr :: (volatile load (s32), addrspace 1, align 4)
+ %mask:_(s32) = G_CONSTANT i32 255
+ %and:_(s32) = G_AND %load, %mask
+ %inreg:_(s32) = G_SEXT_INREG %and, 8
+ $vgpr0 = COPY %inreg
+
+...
+
+---
+name: sext_inreg_v2i32_8_and_neg255
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: sext_inreg_v2i32_8_and_neg255
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 -255
+ ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32)
+ ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_AND %and, [[BUILD_VECTOR]]
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>)
+ %ptr:_(p1) = COPY $vgpr0_vgpr1
+ %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8)
+ %mask_elt:_(s32) = G_CONSTANT i32 -255
+ %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt
+ %and:_(<2 x s32>) = G_AND %load, %mask
+ %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8
+ $vgpr0_vgpr1 = COPY %inreg
+
+...
+
+---
+name: sext_inreg_v2i32_8_and_255
+legalized: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: sext_inreg_v2i32_8_and_255
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %load:_(<2 x s32>) = G_LOAD %ptr(p1) :: (volatile load (<2 x s32>), addrspace 1)
+ ; CHECK-NEXT: %mask_elt:_(s32) = G_CONSTANT i32 255
+ ; CHECK-NEXT: %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt(s32), %mask_elt(s32)
+ ; CHECK-NEXT: %and:_(<2 x s32>) = G_AND %load, %mask
+ ; CHECK-NEXT: %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(<2 x s32>)
+ %ptr:_(p1) = COPY $vgpr0_vgpr1
+ %load:_(<2 x s32>) = G_LOAD %ptr :: (volatile load (<2 x s32>), addrspace 1, align 8)
+ %mask_elt:_(s32) = G_CONSTANT i32 255
+ %mask:_(<2 x s32>) = G_BUILD_VECTOR %mask_elt, %mask_elt
+ %and:_(<2 x s32>) = G_AND %load, %mask
+ %inreg:_(<2 x s32>) = G_SEXT_INREG %and, 8
+ $vgpr0_vgpr1 = COPY %inreg
+
+...
More information about the llvm-commits
mailing list