[llvm] 1b20191 - GlobalISel: Combine out redundant sext_inreg

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 28 14:57:38 PDT 2020


Author: Matt Arsenault
Date: 2020-08-28T17:57:31-04:00
New Revision: 1b201914b5a481b17a85dacc3cb675a35a9a7b97

URL: https://github.com/llvm/llvm-project/commit/1b201914b5a481b17a85dacc3cb675a35a9a7b97
DIFF: https://github.com/llvm/llvm-project/commit/1b201914b5a481b17a85dacc3cb675a35a9a7b97.diff

LOG: GlobalISel: Combine out redundant sext_inreg

The scalar tests don't work yet, since computeNumSignBits apparently
doesn't handle sextload yet, and sext folds into the load first.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 77b55928d586..b8fba9515706 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -342,6 +342,9 @@ class CombinerHelper {
   /// success.
   bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement);
 
+  /// \return true if \p MI is a G_SEXT_INREG that can be erased.
+  bool matchRedundantSExtInReg(MachineInstr &MI);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f0645d8380d8..0b7ac749e605 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -344,6 +344,16 @@ def and_trivial_mask: GICombineRule <
   (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
 >;
 
+// If the input is already sign extended, just drop the extension.
+// sext_inreg x, K ->
+//   if computeNumSignBits(x) >= (x.getScalarSizeInBits() - K + 1)
+def redundant_sext_inreg: GICombineRule <
+  (defs root:$root),
+  (match (wip_match_opcode G_SEXT_INREG):$root,
+         [{ return Helper.matchRedundantSExtInReg(*${root}); }]),
+     (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+>;
+
 // FIXME: These should use the custom predicate feature once it lands.
 def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
                                      undef_to_negative_one,
@@ -357,7 +367,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         binop_right_to_zero, p2i_to_i2p,
                                         i2p_to_p2i]>;
 
-def known_bits_simplifications : GICombineGroup<[and_trivial_mask]>;
+def known_bits_simplifications : GICombineGroup<[
+  and_trivial_mask, redundant_sext_inreg]>;
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
 

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0486be1dabb6..d37a97516c68 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2151,6 +2151,14 @@ bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
   return KB->maskedValueIsZero(Replacement, ~Mask);
 }
 
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+  // If the input is already sign extended, just drop the extension.
+  Register Src = MI.getOperand(1).getReg();
+  unsigned ExtBits = MI.getOperand(2).getImm();
+  unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+  return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
 bool CombinerHelper::tryCombine(MachineInstr &MI) {
   if (tryCombineCopy(MI))
     return true;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir
new file mode 100644
index 000000000000..cb18fe597552
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir
@@ -0,0 +1,189 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: sext_inreg_s32_7_sextload_from_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_s32_7_sextload_from_1
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7
+    ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1)
+    %2:_(s32) = G_SEXT_INREG %1, 7
+    $vgpr0 = COPY %2
+
+...
+
+---
+name: sext_inreg_s32_8_sextload_from_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_s32_8_sextload_from_1
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8
+    ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1)
+    %2:_(s32) = G_SEXT_INREG %1, 8
+    $vgpr0 = COPY %2
+
+...
+
+---
+name: sext_inreg_s32_9_sextload_from_1
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_s32_9_sextload_from_1
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 9
+    ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1)
+    %2:_(s32) = G_SEXT_INREG %1, 9
+    $vgpr0 = COPY %2
+
+...
+
+---
+name: sext_inreg_s32_7_sext_from_s8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_s32_7_sext_from_s8
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7
+    ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1)
+    %2:_(s32) = G_SEXT %1
+    %3:_(s32) = G_SEXT_INREG %2, 7
+    $vgpr0 = COPY %3
+
+...
+
+---
+name: sext_inreg_s32_8_sext_from_s8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_s32_8_sext_from_s8
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8
+    ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1)
+    %2:_(s32) = G_SEXT %1
+    %3:_(s32) = G_SEXT_INREG %2, 8
+    $vgpr0 = COPY %3
+
+...
+
+---
+name: sext_inreg_s32_8_sext_from_s9
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_s32_8_sext_from_s9
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+    ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 9
+    ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1)
+    %2:_(s32) = G_SEXT %1
+    %3:_(s32) = G_SEXT_INREG %2, 9
+    $vgpr0 = COPY %3
+
+...
+
+---
+name: sext_inreg_v2s32_7_sext_from_v2s8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_v2s32_7_sext_from_v2s8
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>)
+    ; GCN: [[SEXT_INREG:%[0-9]+]]:_(<2 x s32>) = G_SEXT_INREG [[SEXT]], 7
+    ; GCN: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](<2 x s32>)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1)
+    %2:_(<2 x s32>) = G_SEXT %1
+    %3:_(<2 x s32>) = G_SEXT_INREG %2, 7
+    $vgpr0_vgpr1 = COPY %3
+
+...
+
+---
+name: sext_inreg_v2s32_8_sext_from_v2s8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_v2s32_8_sext_from_v2s8
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>)
+    ; GCN: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1)
+    %2:_(<2 x s32>) = G_SEXT %1
+    %3:_(<2 x s32>) = G_SEXT_INREG %2, 8
+    $vgpr0_vgpr1 = COPY %3
+
+...
+
+---
+name: sext_inreg_v2s32_9_sext_from_v2s8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GCN-LABEL: name: sext_inreg_v2s32_9_sext_from_v2s8
+    ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+    ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+    ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>)
+    ; GCN: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1)
+    %2:_(<2 x s32>) = G_SEXT %1
+    %3:_(<2 x s32>) = G_SEXT_INREG %2, 9
+    $vgpr0_vgpr1 = COPY %3
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 8c6f7cb717f5..dad8a5ac58e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -1021,8 +1021,7 @@ define i24 @v_saddsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 24
-; GFX8-NEXT:    s_bfe_i32 s6, 0, 0x180000
-; GFX8-NEXT:    v_cmp_gt_i32_e64 s[6:7], s6, v0
+; GFX8-NEXT:    v_cmp_gt_i32_e64 s[6:7], 0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 23, v3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0xff800000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -1079,8 +1078,7 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
 ; GFX8-NEXT:    s_cmp_lt_i32 s3, s0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_bfe_i32 s1, s1, 0x180000
-; GFX8-NEXT:    s_bfe_i32 s4, 0, 0x180000
-; GFX8-NEXT:    s_cmp_lt_i32 s1, s4
+; GFX8-NEXT:    s_cmp_lt_i32 s1, 0
 ; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s3, 23

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 3cad20cb141b..d2c65aa5a178 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -1021,8 +1021,7 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GFX8-NEXT:    v_cmp_lt_i32_e64 s[4:5], v3, v0
 ; GFX8-NEXT:    v_bfe_i32 v0, v1, 0, 24
-; GFX8-NEXT:    s_bfe_i32 s6, 0, 0x180000
-; GFX8-NEXT:    v_cmp_lt_i32_e64 s[6:7], s6, v0
+; GFX8-NEXT:    v_cmp_lt_i32_e64 s[6:7], 0, v0
 ; GFX8-NEXT:    v_ashrrev_i32_e32 v0, 23, v3
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, 0xff800000, v0
 ; GFX8-NEXT:    s_xor_b64 vcc, s[6:7], s[4:5]
@@ -1079,8 +1078,7 @@ define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
 ; GFX8-NEXT:    s_cmp_lt_i32 s3, s0
 ; GFX8-NEXT:    s_cselect_b32 s0, 1, 0
 ; GFX8-NEXT:    s_bfe_i32 s1, s1, 0x180000
-; GFX8-NEXT:    s_bfe_i32 s4, 0, 0x180000
-; GFX8-NEXT:    s_cmp_gt_i32 s1, s4
+; GFX8-NEXT:    s_cmp_gt_i32 s1, 0
 ; GFX8-NEXT:    s_cselect_b32 s1, 1, 0
 ; GFX8-NEXT:    s_xor_b32 s0, s1, s0
 ; GFX8-NEXT:    s_ashr_i32 s1, s3, 23


        


More information about the llvm-commits mailing list