[llvm-branch-commits] [llvm] [AMDGPU][GlobalISel] Combine (sext (trunc (sext_in_reg x))) (PR #131312)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Mar 14 04:19:30 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-globalisel
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
This is a bit of an akward pattern that can come up as a result
of legalization and then widening of i16 operations to i32 in RegBankSelect
on AMDGPU.
This quick combine avoids redundant patterns like
```
s_sext_i32_i8 s0, s0
s_sext_i32_i16 s0, s0
s_ashr_i32 s0, s0, s1
```
With this the second sext is removed as it's redundant.
---
Full diff: https://github.com/llvm/llvm-project/pull/131312.diff
3 Files Affected:
- (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+11-1)
- (added) llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir (+86)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll (+16-62)
``````````diff
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 3590ab221ad44..9727b86b4be8b 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -258,6 +258,14 @@ def sext_trunc_sextload : GICombineRule<
[{ return Helper.matchSextTruncSextLoad(*${d}); }]),
(apply [{ Helper.applySextTruncSextLoad(*${d}); }])>;
+def sext_trunc_sextinreg : GICombineRule<
+ (defs root:$dst),
+ (match (G_SEXT_INREG $sir, $src, $width),
+ (G_TRUNC $trunc, $sir),
+ (G_SEXT $dst, $trunc),
+ [{ return (MRI.getType(${trunc}.getReg()).getScalarSizeInBits() >= ${width}.getImm()); }]),
+ (apply (GIReplaceReg $dst, $sir))>;
+
def sext_inreg_of_load_matchdata : GIDefMatchData<"std::tuple<Register, unsigned>">;
def sext_inreg_of_load : GICombineRule<
(defs root:$root, sext_inreg_of_load_matchdata:$matchinfo),
@@ -1896,7 +1904,9 @@ def cast_of_cast_combines: GICombineGroup<[
sext_of_anyext,
anyext_of_anyext,
anyext_of_zext,
- anyext_of_sext
+ anyext_of_sext,
+
+ sext_trunc_sextinreg
]>;
def cast_combines: GICombineGroup<[
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
new file mode 100644
index 0000000000000..d41e5b172efc2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
@@ -0,0 +1,86 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: trunc_s16_inreg_8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s16_inreg_8
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %trunc:_(s16) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s16_inreg_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s16_inreg_16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %trunc:_(s16) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s8_inreg_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s8_inreg_16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
+ ; CHECK-NEXT: %sext:_(s32) = G_SEXT %trunc(s8)
+ ; CHECK-NEXT: $vgpr0 = COPY %sext(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %trunc:_(s8) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+# TODO?: We could handle this by inserting a trunc, but I'm not sure how useful that'd be.
+---
+name: mismatching_types
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: mismatching_types
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
+ ; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8)
+ ; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16)
+ ; CHECK-NEXT: $vgpr0 = COPY %anyext(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %trunc:_(s8) = G_TRUNC %inreg
+ %sext:_(s16) = G_SEXT %trunc
+ %anyext:_(s32) = G_ANYEXT %sext
+ $vgpr0 = COPY %anyext
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 8c687d85ac24b..7ec27f47578c2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -197,33 +197,13 @@ define amdgpu_cs <4 x i32> @abs_vgpr_v4i32(<4 x i32> %arg) {
}
define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
-; GFX6-LABEL: abs_sgpr_v2i8:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_sext_i32_i8 s0, s0
-; GFX6-NEXT: s_sext_i32_i8 s1, s1
-; GFX6-NEXT: s_abs_i32 s0, s0
-; GFX6-NEXT: s_abs_i32 s1, s1
-; GFX6-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_v2i8:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_sext_i32_i8 s0, s0
-; GFX8-NEXT: s_sext_i32_i8 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_abs_i32 s0, s0
-; GFX8-NEXT: s_abs_i32 s1, s1
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_v2i8:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_sext_i32_i8 s0, s0
-; GFX10-NEXT: s_sext_i32_i8 s1, s1
-; GFX10-NEXT: s_sext_i32_i16 s0, s0
-; GFX10-NEXT: s_sext_i32_i16 s1, s1
-; GFX10-NEXT: s_abs_i32 s0, s0
-; GFX10-NEXT: s_abs_i32 s1, s1
-; GFX10-NEXT: ; return to shader part epilog
+; GFX-LABEL: abs_sgpr_v2i8:
+; GFX: ; %bb.0:
+; GFX-NEXT: s_sext_i32_i8 s0, s0
+; GFX-NEXT: s_sext_i32_i8 s1, s1
+; GFX-NEXT: s_abs_i32 s0, s0
+; GFX-NEXT: s_abs_i32 s1, s1
+; GFX-NEXT: ; return to shader part epilog
%res = call <2 x i8> @llvm.abs.v2i8(<2 x i8> %arg, i1 false)
ret <2 x i8> %res
}
@@ -268,41 +248,15 @@ define amdgpu_cs <2 x i8> @abs_vgpr_v2i8(<2 x i8> %arg) {
}
define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
-; GFX6-LABEL: abs_sgpr_v3i8:
-; GFX6: ; %bb.0:
-; GFX6-NEXT: s_sext_i32_i8 s0, s0
-; GFX6-NEXT: s_sext_i32_i8 s1, s1
-; GFX6-NEXT: s_sext_i32_i8 s2, s2
-; GFX6-NEXT: s_abs_i32 s0, s0
-; GFX6-NEXT: s_abs_i32 s1, s1
-; GFX6-NEXT: s_abs_i32 s2, s2
-; GFX6-NEXT: ; return to shader part epilog
-;
-; GFX8-LABEL: abs_sgpr_v3i8:
-; GFX8: ; %bb.0:
-; GFX8-NEXT: s_sext_i32_i8 s0, s0
-; GFX8-NEXT: s_sext_i32_i8 s1, s1
-; GFX8-NEXT: s_sext_i32_i8 s2, s2
-; GFX8-NEXT: s_sext_i32_i16 s0, s0
-; GFX8-NEXT: s_sext_i32_i16 s1, s1
-; GFX8-NEXT: s_sext_i32_i16 s2, s2
-; GFX8-NEXT: s_abs_i32 s0, s0
-; GFX8-NEXT: s_abs_i32 s1, s1
-; GFX8-NEXT: s_abs_i32 s2, s2
-; GFX8-NEXT: ; return to shader part epilog
-;
-; GFX10-LABEL: abs_sgpr_v3i8:
-; GFX10: ; %bb.0:
-; GFX10-NEXT: s_sext_i32_i8 s0, s0
-; GFX10-NEXT: s_sext_i32_i8 s1, s1
-; GFX10-NEXT: s_sext_i32_i8 s2, s2
-; GFX10-NEXT: s_sext_i32_i16 s0, s0
-; GFX10-NEXT: s_sext_i32_i16 s1, s1
-; GFX10-NEXT: s_sext_i32_i16 s2, s2
-; GFX10-NEXT: s_abs_i32 s0, s0
-; GFX10-NEXT: s_abs_i32 s1, s1
-; GFX10-NEXT: s_abs_i32 s2, s2
-; GFX10-NEXT: ; return to shader part epilog
+; GFX-LABEL: abs_sgpr_v3i8:
+; GFX: ; %bb.0:
+; GFX-NEXT: s_sext_i32_i8 s0, s0
+; GFX-NEXT: s_sext_i32_i8 s1, s1
+; GFX-NEXT: s_sext_i32_i8 s2, s2
+; GFX-NEXT: s_abs_i32 s0, s0
+; GFX-NEXT: s_abs_i32 s1, s1
+; GFX-NEXT: s_abs_i32 s2, s2
+; GFX-NEXT: ; return to shader part epilog
%res = call <3 x i8> @llvm.abs.v3i8(<3 x i8> %arg, i1 false)
ret <3 x i8> %res
}
``````````
</details>
https://github.com/llvm/llvm-project/pull/131312
More information about the llvm-branch-commits
mailing list