[llvm] [AMDGPU] Fold into uses of splat REG_SEQUENCEs through COPYs. (PR #145691)
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Sun Jul 20 07:42:37 PDT 2025
https://github.com/kosarev updated https://github.com/llvm/llvm-project/pull/145691
>From 9300f4e015603ccd07f70943633b4a8fc1e3b642 Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev at amd.com>
Date: Wed, 25 Jun 2025 12:44:38 +0100
Subject: [PATCH 1/2] [AMDGPU] Fold into uses of splat REG_SEQUENCEs through
COPYs.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 9 ++++++++-
llvm/test/CodeGen/AMDGPU/packed-fp32.ll | 10 ++--------
2 files changed, 10 insertions(+), 9 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 0ed06c37507af..b4a596bd12f06 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1164,11 +1164,18 @@ void SIFoldOperandsImpl::foldOperand(
// Grab the use operands first
SmallVector<MachineOperand *, 4> UsesToProcess(
llvm::make_pointer_range(MRI->use_nodbg_operands(RegSeqDstReg)));
- for (auto *RSUse : UsesToProcess) {
+ for (unsigned I = 0; I != UsesToProcess.size(); ++I) {
+ MachineOperand *RSUse = UsesToProcess[I];
MachineInstr *RSUseMI = RSUse->getParent();
unsigned OpNo = RSUseMI->getOperandNo(RSUse);
if (SplatRC) {
+ if (RSUseMI->isCopy()) {
+ Register DstReg = RSUseMI->getOperand(0).getReg();
+ append_range(UsesToProcess,
+ make_pointer_range(MRI->use_nodbg_operands(DstReg)));
+ continue;
+ }
if (tryFoldRegSeqSplat(RSUseMI, OpNo, SplatVal, SplatRC)) {
FoldableDef SplatDef(SplatVal, SplatRC);
appendFoldCandidate(FoldList, RSUseMI, OpNo, SplatDef);
diff --git a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
index bef38c1a65ef8..4db3f2189bfc3 100644
--- a/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
+++ b/llvm/test/CodeGen/AMDGPU/packed-fp32.ll
@@ -2155,11 +2155,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
; GFX90A-GISEL-LABEL: fadd_fadd_fsub_0:
; GFX90A-GISEL: ; %bb.0: ; %bb
; GFX90A-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX90A-GISEL-NEXT: s_mov_b32 s2, 0
-; GFX90A-GISEL-NEXT: s_mov_b32 s3, s2
-; GFX90A-GISEL-NEXT: v_pk_mov_b32 v[0:1], s[2:3], s[2:3] op_sel:[0,1]
; GFX90A-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
+; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v0, v1
; GFX90A-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
; GFX90A-GISEL-NEXT: v_mov_b32_e32 v2, s0
@@ -2170,11 +2167,8 @@ define amdgpu_kernel void @fadd_fadd_fsub_0(<2 x float> %arg) {
; GFX942-GISEL-LABEL: fadd_fadd_fsub_0:
; GFX942-GISEL: ; %bb.0: ; %bb
; GFX942-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX942-GISEL-NEXT: s_mov_b32 s2, 0
-; GFX942-GISEL-NEXT: s_mov_b32 s3, s2
-; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], v[0:1]
+; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], s[0:1], 0
; GFX942-GISEL-NEXT: s_nop 0
; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, v1
; GFX942-GISEL-NEXT: v_pk_add_f32 v[0:1], v[0:1], 0
>From 5b81404d6a2e8f34ced4708380b826a0aecaa210 Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev at amd.com>
Date: Sun, 20 Jul 2025 15:41:18 +0100
Subject: [PATCH 2/2] Add a MIR test.
---
.../CodeGen/AMDGPU/si-fold-reg-sequence.mir | 32 +++++++++++++++++--
1 file changed, 30 insertions(+), 2 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
index 7852f5d0c96f5..bd34477e757e2 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fold-reg-sequence.mir
@@ -1,11 +1,23 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
+# Check that we don't hang on this.
---
name: fold_reg_sequence
body: |
bb.0:
liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-LABEL: name: fold_reg_sequence
+ ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 429
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[S_MOV_B32_1]], %subreg.sub0, [[S_MOV_B32_]], %subreg.sub1
+ ; CHECK-NEXT: [[V_MUL_HI_U32_e64_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 $vgpr2, [[REG_SEQUENCE]].sub0, implicit $exec
+ ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
+ ; CHECK-NEXT: [[V_MUL_HI_U32_e64_1:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[GLOBAL_LOAD_DWORD]], [[REG_SEQUENCE]].sub0, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0
%0:sreg_32 = S_MOV_B32 0
%1:sreg_32 = S_MOV_B32 429
%2:sreg_64 = REG_SEQUENCE killed %1, %subreg.sub0, %0, %subreg.sub1
@@ -13,6 +25,22 @@ body: |
%4:vgpr_32 = GLOBAL_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec :: (load (s32), addrspace 1)
%5:vgpr_32 = V_MUL_HI_U32_e64 %4, %2.sub0, implicit $exec
S_ENDPGM 0
-
...
+# Fold through a COPY of REG_SEQUENCE.
+---
+name: fold_through_copy
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: fold_through_copy
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_PK_ADD_F32_:%[0-9]+]]:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, [[DEF1]], 8, 0, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %15:sreg_32 = S_MOV_B32 0
+ %14:sreg_64 = REG_SEQUENCE %15:sreg_32, %subreg.sub0, %15:sreg_32, %subreg.sub1
+ %21:sreg_64 = IMPLICIT_DEF
+ %13:sreg_64_xexec = IMPLICIT_DEF
+ %32:vreg_64_align2 = COPY %14:sreg_64
+ %16:vreg_64_align2 = nofpexcept V_PK_ADD_F32 8, %13:sreg_64_xexec, 8, %32:vreg_64_align2, 0, 0, 0, 0, 0, implicit $mode, implicit $exec
+...
More information about the llvm-commits
mailing list