[llvm] 1b20191 - GlobalISel: Combine out redundant sext_inreg
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 28 14:57:38 PDT 2020
Author: Matt Arsenault
Date: 2020-08-28T17:57:31-04:00
New Revision: 1b201914b5a481b17a85dacc3cb675a35a9a7b97
URL: https://github.com/llvm/llvm-project/commit/1b201914b5a481b17a85dacc3cb675a35a9a7b97
DIFF: https://github.com/llvm/llvm-project/commit/1b201914b5a481b17a85dacc3cb675a35a9a7b97.diff
LOG: GlobalISel: Combine out redundant sext_inreg
The scalar tests don't work yet, since computeNumSignBits apparently
doesn't handle sextload yet, and sext folds into the load first.
Added:
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 77b55928d586..b8fba9515706 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -342,6 +342,9 @@ class CombinerHelper {
/// success.
bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement);
+ /// \return true if \p MI is a G_SEXT_INREG that can be erased.
+ bool matchRedundantSExtInReg(MachineInstr &MI);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index f0645d8380d8..0b7ac749e605 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -344,6 +344,16 @@ def and_trivial_mask: GICombineRule <
(apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
+// If the input is already sign extended, just drop the extension.
+// sext_inreg x, K ->
+// if computeNumSignBits(x) >= (x.getScalarSizeInBits() - K + 1)
+def redundant_sext_inreg: GICombineRule <
+ (defs root:$root),
+ (match (wip_match_opcode G_SEXT_INREG):$root,
+ [{ return Helper.matchRedundantSExtInReg(*${root}); }]),
+ (apply [{ return Helper.replaceSingleDefInstWithOperand(*${root}, 1); }])
+>;
+
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
undef_to_negative_one,
@@ -357,7 +367,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
binop_right_to_zero, p2i_to_i2p,
i2p_to_p2i]>;
-def known_bits_simplifications : GICombineGroup<[and_trivial_mask]>;
+def known_bits_simplifications : GICombineGroup<[
+ and_trivial_mask, redundant_sext_inreg]>;
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 0486be1dabb6..d37a97516c68 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2151,6 +2151,14 @@ bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
return KB->maskedValueIsZero(Replacement, ~Mask);
}
+bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
+ // If the input is already sign extended, just drop the extension.
+ Register Src = MI.getOperand(1).getReg();
+ unsigned ExtBits = MI.getOperand(2).getImm();
+ unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
+ return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir
new file mode 100644
index 000000000000..cb18fe597552
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-inreg.mir
@@ -0,0 +1,189 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=amdgpu-prelegalizer-combiner -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name: sext_inreg_s32_7_sextload_from_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_s32_7_sextload_from_1
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+ ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7
+ ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1)
+ %2:_(s32) = G_SEXT_INREG %1, 7
+ $vgpr0 = COPY %2
+
+...
+
+---
+name: sext_inreg_s32_8_sextload_from_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_s32_8_sextload_from_1
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+ ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8
+ ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1)
+ %2:_(s32) = G_SEXT_INREG %1, 8
+ $vgpr0 = COPY %2
+
+...
+
+---
+name: sext_inreg_s32_9_sextload_from_1
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_s32_9_sextload_from_1
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+ ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 9
+ ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s32) = G_SEXTLOAD %0 :: (load 1, addrspace 1)
+ %2:_(s32) = G_SEXT_INREG %1, 9
+ $vgpr0 = COPY %2
+
+...
+
+---
+name: sext_inreg_s32_7_sext_from_s8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_s32_7_sext_from_s8
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+ ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 7
+ ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1)
+ %2:_(s32) = G_SEXT %1
+ %3:_(s32) = G_SEXT_INREG %2, 7
+ $vgpr0 = COPY %3
+
+...
+
+---
+name: sext_inreg_s32_8_sext_from_s8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_s32_8_sext_from_s8
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+ ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8
+ ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1)
+ %2:_(s32) = G_SEXT %1
+ %3:_(s32) = G_SEXT_INREG %2, 8
+ $vgpr0 = COPY %3
+
+...
+
+---
+name: sext_inreg_s32_8_sext_from_s9
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_s32_8_sext_from_s9
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p1) :: (load 1, addrspace 1)
+ ; GCN: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 9
+ ; GCN: $vgpr0 = COPY [[SEXT_INREG]](s32)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(s8) = G_LOAD %0 :: (load 1, addrspace 1)
+ %2:_(s32) = G_SEXT %1
+ %3:_(s32) = G_SEXT_INREG %2, 9
+ $vgpr0 = COPY %3
+
+...
+
+---
+name: sext_inreg_v2s32_7_sext_from_v2s8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_v2s32_7_sext_from_v2s8
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+ ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>)
+ ; GCN: [[SEXT_INREG:%[0-9]+]]:_(<2 x s32>) = G_SEXT_INREG [[SEXT]], 7
+ ; GCN: $vgpr0_vgpr1 = COPY [[SEXT_INREG]](<2 x s32>)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1)
+ %2:_(<2 x s32>) = G_SEXT %1
+ %3:_(<2 x s32>) = G_SEXT_INREG %2, 7
+ $vgpr0_vgpr1 = COPY %3
+
+...
+
+---
+name: sext_inreg_v2s32_8_sext_from_v2s8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_v2s32_8_sext_from_v2s8
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+ ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>)
+ ; GCN: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1)
+ %2:_(<2 x s32>) = G_SEXT %1
+ %3:_(<2 x s32>) = G_SEXT_INREG %2, 8
+ $vgpr0_vgpr1 = COPY %3
+
+...
+
+---
+name: sext_inreg_v2s32_9_sext_from_v2s8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ ; GCN-LABEL: name: sext_inreg_v2s32_9_sext_from_v2s8
+ ; GCN: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+ ; GCN: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+ ; GCN: [[LOAD:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[COPY]](p1) :: (load 2, addrspace 1)
+ ; GCN: [[SEXT:%[0-9]+]]:_(<2 x s32>) = G_SEXT [[LOAD]](<2 x s8>)
+ ; GCN: $vgpr0_vgpr1 = COPY [[SEXT]](<2 x s32>)
+ %0:_(p1) = COPY $vgpr0_vgpr1
+ %1:_(<2 x s8>) = G_LOAD %0 :: (load 2, addrspace 1)
+ %2:_(<2 x s32>) = G_SEXT %1
+ %3:_(<2 x s32>) = G_SEXT_INREG %2, 9
+ $vgpr0_vgpr1 = COPY %3
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
index 8c6f7cb717f5..dad8a5ac58e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/saddsat.ll
@@ -1021,8 +1021,7 @@ define i24 @v_saddsat_i24(i24 %lhs, i24 %rhs) {
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 24
-; GFX8-NEXT: s_bfe_i32 s6, 0, 0x180000
-; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], s6, v0
+; GFX8-NEXT: v_cmp_gt_i32_e64 s[6:7], 0, v0
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 23, v3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0xff800000, v0
; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5]
@@ -1079,8 +1078,7 @@ define amdgpu_ps i24 @s_saddsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX8-NEXT: s_cmp_lt_i32 s3, s0
; GFX8-NEXT: s_cselect_b32 s0, 1, 0
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x180000
-; GFX8-NEXT: s_bfe_i32 s4, 0, 0x180000
-; GFX8-NEXT: s_cmp_lt_i32 s1, s4
+; GFX8-NEXT: s_cmp_lt_i32 s1, 0
; GFX8-NEXT: s_cselect_b32 s1, 1, 0
; GFX8-NEXT: s_xor_b32 s0, s1, s0
; GFX8-NEXT: s_ashr_i32 s1, s3, 23
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
index 3cad20cb141b..d2c65aa5a178 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/ssubsat.ll
@@ -1021,8 +1021,7 @@ define i24 @v_ssubsat_i24(i24 %lhs, i24 %rhs) {
; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 24
; GFX8-NEXT: v_cmp_lt_i32_e64 s[4:5], v3, v0
; GFX8-NEXT: v_bfe_i32 v0, v1, 0, 24
-; GFX8-NEXT: s_bfe_i32 s6, 0, 0x180000
-; GFX8-NEXT: v_cmp_lt_i32_e64 s[6:7], s6, v0
+; GFX8-NEXT: v_cmp_lt_i32_e64 s[6:7], 0, v0
; GFX8-NEXT: v_ashrrev_i32_e32 v0, 23, v3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0xff800000, v0
; GFX8-NEXT: s_xor_b64 vcc, s[6:7], s[4:5]
@@ -1079,8 +1078,7 @@ define amdgpu_ps i24 @s_ssubsat_i24(i24 inreg %lhs, i24 inreg %rhs) {
; GFX8-NEXT: s_cmp_lt_i32 s3, s0
; GFX8-NEXT: s_cselect_b32 s0, 1, 0
; GFX8-NEXT: s_bfe_i32 s1, s1, 0x180000
-; GFX8-NEXT: s_bfe_i32 s4, 0, 0x180000
-; GFX8-NEXT: s_cmp_gt_i32 s1, s4
+; GFX8-NEXT: s_cmp_gt_i32 s1, 0
; GFX8-NEXT: s_cselect_b32 s1, 1, 0
; GFX8-NEXT: s_xor_b32 s0, s1, s0
; GFX8-NEXT: s_ashr_i32 s1, s3, 23
More information about the llvm-commits
mailing list