[llvm-branch-commits] [llvm] [GlobalISel] Combine redundant sext_inreg (PR #131624)
Pierre van Houtryve via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Mar 26 02:18:53 PDT 2025
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/131624
>From f4c801437460aef9b9c2e5f49d1e98ec90fadb16 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Mon, 17 Mar 2025 13:54:59 +0100
Subject: [PATCH 1/4] [GlobalISel] Combine redundant sext_inreg
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 +
.../include/llvm/Target/GlobalISel/Combine.td | 9 +-
.../GlobalISel/CombinerHelperCasts.cpp | 27 +++
.../combine-redundant-sext-inreg.mir | 164 ++++++++++++++++++
.../combine-sext-trunc-sextinreg.mir | 87 ++++++++++
.../CodeGen/AMDGPU/GlobalISel/llvm.abs.ll | 5 -
6 files changed, 289 insertions(+), 6 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9b78342c8fc39..5778377d125a8 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -994,6 +994,9 @@ class CombinerHelper {
// overflow sub
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;
+ // (sext_inreg (sext_inreg x, K0), K1)
+ void applyRedundantSextInReg(MachineInstr &Root, MachineInstr &Other) const;
+
private:
/// Checks for legality of an indexed variant of \p LdSt.
bool isIndexedLoadStoreLegal(GLoadStore &LdSt) const;
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 660b03080f92e..6a0ff683a4647 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1849,6 +1849,12 @@ def anyext_of_anyext : ext_of_ext_opcodes<G_ANYEXT, G_ANYEXT>;
def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
+def sext_inreg_of_sext_inreg : GICombineRule<
+ (defs root:$dst),
+ (match (G_SEXT_INREG $x, $src, $a):$other,
+ (G_SEXT_INREG $dst, $x, $b):$root),
+ (apply [{ Helper.applyRedundantSextInReg(*${root}, *${other}); }])>;
+
// Push cast through build vector.
class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
(defs root:$root, build_fn_matchinfo:$matchinfo),
@@ -1896,7 +1902,8 @@ def cast_of_cast_combines: GICombineGroup<[
sext_of_anyext,
anyext_of_anyext,
anyext_of_zext,
- anyext_of_sext
+ anyext_of_sext,
+ sext_inreg_of_sext_inreg,
]>;
def cast_combines: GICombineGroup<[
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 576fd5fd81703..883a62c308232 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -378,3 +378,30 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
return false;
}
}
+
+void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root,
+ MachineInstr &Other) const {
+ assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
+ Other.getOpcode() == TargetOpcode::G_SEXT_INREG);
+
+ unsigned RootWidth = Root.getOperand(2).getImm();
+ unsigned OtherWidth = Other.getOperand(2).getImm();
+
+ Register Dst = Root.getOperand(0).getReg();
+ Register OtherDst = Other.getOperand(0).getReg();
+ Register Src = Other.getOperand(1).getReg();
+
+ if (RootWidth >= OtherWidth) {
+ // The root sext_inreg is entirely redundant because the other one
+ // is narrower.
+ Observer.changingAllUsesOfReg(MRI, Dst);
+ MRI.replaceRegWith(Dst, OtherDst);
+ Observer.finishedChangingAllUsesOfReg();
+ } else {
+ // RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
+ // other G_SEXT_INREG.
+ Builder.buildSExtInReg(Dst, Src, RootWidth);
+ }
+
+ Root.eraseFromParent();
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
new file mode 100644
index 0000000000000..566ee8e6c338d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
@@ -0,0 +1,164 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: inreg8_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: inreg8_inreg16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %inreg1:_(s32) = G_SEXT_INREG %inreg, 16
+ $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: inreg16_inreg16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %inreg1:_(s32) = G_SEXT_INREG %inreg, 16
+ $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: inreg16_inreg8
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg1:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg1(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %inreg1:_(s32) = G_SEXT_INREG %inreg, 8
+ $vgpr0 = COPY %inreg1
+...
+
+---
+name: inreg16_inreg32_64bit
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: inreg16_inreg32_64bit
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
+ %copy:_(s64) = COPY $vgpr0_vgpr1
+ %inreg:_(s64) = G_SEXT_INREG %copy, 16
+ %inreg1:_(s64) = G_SEXT_INREG %inreg, 32
+ $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: inreg32_inreg32_64bit
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: inreg32_inreg32_64bit
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %inreg:_(s64) = G_SEXT_INREG %copy, 32
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg(s64)
+ %copy:_(s64) = COPY $vgpr0_vgpr1
+ %inreg:_(s64) = G_SEXT_INREG %copy, 32
+ %inreg1:_(s64) = G_SEXT_INREG %inreg, 32
+ $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: inreg32_inreg16_64bit
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+ ; CHECK-LABEL: name: inreg32_inreg16_64bit
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s64) = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: %inreg1:_(s64) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0_vgpr1 = COPY %inreg1(s64)
+ %copy:_(s64) = COPY $vgpr0_vgpr1
+ %inreg:_(s64) = G_SEXT_INREG %copy, 32
+ %inreg1:_(s64) = G_SEXT_INREG %inreg, 16
+ $vgpr0_vgpr1 = COPY %inreg1
+...
+
+---
+name: vector_inreg8_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: vector_inreg8_inreg16
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
+ %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+ %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
+
+---
+name: vector_inreg16_inreg16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: vector_inreg16_inreg16
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg(<4 x s32>)
+ %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+ %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 16
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
+
+---
+name: vector_inreg16_inreg8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-LABEL: name: vector_inreg16_inreg8
+ ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ ; CHECK-NEXT: %inreg1:_(<4 x s32>) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1(<4 x s32>)
+ %copy:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+ %inreg:_(<4 x s32>) = G_SEXT_INREG %copy, 16
+ %inreg1:_(<4 x s32>) = G_SEXT_INREG %inreg, 8
+ $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %inreg1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
new file mode 100644
index 0000000000000..c60c137b17f84
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+
+# Check (sext (trunc (sext_inreg x))) can be folded, as it's a pattern that can arise when
+# CGP widening of uniform i16 ops is disabled.
+# Two separate combines make it happen (sext_trunc and sext_inreg_of_sext_inreg).
+
+---
+name: trunc_s16_inreg_8
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s16_inreg_8
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %trunc:_(s16) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s16_inreg_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s16_inreg_16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ ; CHECK-NEXT: $vgpr0 = COPY %inreg(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %trunc:_(s16) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+---
+name: trunc_s8_inreg_16
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: trunc_s8_inreg_16
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %sext:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: $vgpr0 = COPY %sext(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 16
+ %trunc:_(s8) = G_TRUNC %inreg
+ %sext:_(s32) = G_SEXT %trunc
+ $vgpr0 = COPY %sext
+...
+
+# TODO?: We could handle this by inserting a trunc, but I'm not sure how useful that'd be.
+---
+name: mismatching_types
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0
+ ; CHECK-LABEL: name: mismatching_types
+ ; CHECK: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %copy:_(s32) = COPY $vgpr0
+ ; CHECK-NEXT: %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ ; CHECK-NEXT: %trunc:_(s8) = G_TRUNC %inreg(s32)
+ ; CHECK-NEXT: %sext:_(s16) = G_SEXT %trunc(s8)
+ ; CHECK-NEXT: %anyext:_(s32) = G_ANYEXT %sext(s16)
+ ; CHECK-NEXT: $vgpr0 = COPY %anyext(s32)
+ %copy:_(s32) = COPY $vgpr0
+ %inreg:_(s32) = G_SEXT_INREG %copy, 8
+ %trunc:_(s8) = G_TRUNC %inreg
+ %sext:_(s16) = G_SEXT %trunc
+ %anyext:_(s32) = G_ANYEXT %sext
+ $vgpr0 = COPY %anyext
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
index 41e915a4c1011..18a222e56fd0c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.abs.ll
@@ -223,8 +223,6 @@ define amdgpu_cs <2 x i8> @abs_sgpr_v2i8(<2 x i8> inreg %arg) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
-; GFX10-NEXT: s_sext_i32_i16 s0, s0
-; GFX10-NEXT: s_sext_i32_i16 s1, s1
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: ; return to shader part epilog
@@ -308,9 +306,6 @@ define amdgpu_cs <3 x i8> @abs_sgpr_v3i8(<3 x i8> inreg %arg) {
; GFX10-NEXT: s_sext_i32_i8 s0, s0
; GFX10-NEXT: s_sext_i32_i8 s1, s1
; GFX10-NEXT: s_sext_i32_i8 s2, s2
-; GFX10-NEXT: s_sext_i32_i16 s0, s0
-; GFX10-NEXT: s_sext_i32_i16 s1, s1
-; GFX10-NEXT: s_sext_i32_i16 s2, s2
; GFX10-NEXT: s_abs_i32 s0, s0
; GFX10-NEXT: s_abs_i32 s1, s1
; GFX10-NEXT: s_abs_i32 s2, s2
>From 2996c6e849c4ec8681bd7dad21a541c040184159 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 18 Mar 2025 08:53:28 +0100
Subject: [PATCH 2/4] check canReplaceReg
---
.../llvm/CodeGen/GlobalISel/CombinerHelper.h | 3 ++-
.../include/llvm/Target/GlobalISel/Combine.td | 7 +++---
.../GlobalISel/CombinerHelperCasts.cpp | 22 ++++++++++++-------
3 files changed, 20 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 5778377d125a8..24da7e62a043b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -995,7 +995,8 @@ class CombinerHelper {
bool matchSuboCarryOut(const MachineInstr &MI, BuildFnTy &MatchInfo) const;
// (sext_inreg (sext_inreg x, K0), K1)
- void applyRedundantSextInReg(MachineInstr &Root, MachineInstr &Other) const;
+ bool matchRedundantSextInReg(MachineInstr &Root, MachineInstr &Other,
+ BuildFnTy &MatchInfo) const;
private:
/// Checks for legality of an indexed variant of \p LdSt.
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 6a0ff683a4647..d525edaca5527 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1850,10 +1850,11 @@ def anyext_of_zext : ext_of_ext_opcodes<G_ANYEXT, G_ZEXT>;
def anyext_of_sext : ext_of_ext_opcodes<G_ANYEXT, G_SEXT>;
def sext_inreg_of_sext_inreg : GICombineRule<
- (defs root:$dst),
+ (defs root:$dst, build_fn_matchinfo:$matchinfo),
(match (G_SEXT_INREG $x, $src, $a):$other,
- (G_SEXT_INREG $dst, $x, $b):$root),
- (apply [{ Helper.applyRedundantSextInReg(*${root}, *${other}); }])>;
+ (G_SEXT_INREG $dst, $x, $b):$root,
+ [{ return Helper.matchRedundantSextInReg(*${root}, *${other}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
// Push cast through build vector.
class buildvector_of_opcode<Instruction castOpcode> : GICombineRule <
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index 883a62c308232..d08a19f6103fa 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -379,8 +379,9 @@ bool CombinerHelper::matchCastOfInteger(const MachineInstr &CastMI,
}
}
-void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root,
- MachineInstr &Other) const {
+bool CombinerHelper::matchRedundantSextInReg(MachineInstr &Root,
+ MachineInstr &Other,
+ BuildFnTy &MatchInfo) const {
assert(Root.getOpcode() == TargetOpcode::G_SEXT_INREG &&
Other.getOpcode() == TargetOpcode::G_SEXT_INREG);
@@ -394,14 +395,19 @@ void CombinerHelper::applyRedundantSextInReg(MachineInstr &Root,
if (RootWidth >= OtherWidth) {
// The root sext_inreg is entirely redundant because the other one
// is narrower.
- Observer.changingAllUsesOfReg(MRI, Dst);
- MRI.replaceRegWith(Dst, OtherDst);
- Observer.finishedChangingAllUsesOfReg();
+ if (!canReplaceReg(Dst, OtherDst, MRI))
+ return false;
+
+ MatchInfo = [=](MachineIRBuilder &B) {
+ Observer.changingAllUsesOfReg(MRI, Dst);
+ MRI.replaceRegWith(Dst, OtherDst);
+ Observer.finishedChangingAllUsesOfReg();
+ };
} else {
// RootWidth < OtherWidth, rewrite this G_SEXT_INREG with the source of the
// other G_SEXT_INREG.
- Builder.buildSExtInReg(Dst, Src, RootWidth);
+ MatchInfo = [=](MachineIRBuilder &B) {
+ B.buildSExtInReg(Dst, Src, RootWidth);
+ };
}
-
- Root.eraseFromParent();
}
>From 59a0610aa57440ae2dd74c6f22fec2217013d659 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 18 Mar 2025 08:55:59 +0100
Subject: [PATCH 3/4] add missing return
---
llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
index d08a19f6103fa..fc7e3ae5b7942 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelperCasts.cpp
@@ -410,4 +410,6 @@ bool CombinerHelper::matchRedundantSextInReg(MachineInstr &Root,
B.buildSExtInReg(Dst, Src, RootWidth);
};
}
+
+ return true;
}
>From 5a8230e357410ff9789fb1a6c3414ede820eab4c Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 26 Mar 2025 10:18:34 +0100
Subject: [PATCH 4/4] drop -verify-machineinstrs
---
.../CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir | 2 +-
.../CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
index 566ee8e6c338d..746458e03cc85 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-redundant-sext-inreg.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s
---
name: inreg8_inreg16
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
index c60c137b17f84..95d72739e2f25 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-sext-trunc-sextinreg.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 -run-pass=amdgpu-regbank-combiner %s -o - | FileCheck %s
# Check (sext (trunc (sext_inreg x))) can be folded, as it's a pattern that can arise when
# CGP widening of uniform i16 ops is disabled.
More information about the llvm-branch-commits
mailing list