[llvm] [AMDGPU] Fix register class constraints for si-fold-operands pass when folding immediate into copies (PR #131387)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 11:07:22 PDT 2025
https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/131387
>From d2316eedc1508eabccbedb6df35f0da4d1cc9a38 Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Fri, 14 Mar 2025 16:14:19 -0400
Subject: [PATCH] [AMDGPU] Fix register class constraints for si-fold-operands
pass
Fixes #130020
This fixes an issue where the si-fold-operands pass would incorrectly
fold immediate values into COPY instructions targeting av_32 registers,
which is illegal.
The pass now properly checks register class constraints before attempting to
fold the immediates.
---
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp | 10 ++
llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir | 197 +++++++++------------
2 files changed, 89 insertions(+), 118 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 91df516b80857..deb57eac69be2 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1047,6 +1047,16 @@ void SIFoldOperandsImpl::foldOperand(
if (MovOp == AMDGPU::COPY)
return;
+ // Fold if the destination register class of the MOV instruction (ResRC)
+ // is a superclass of (or equal to) the destination register class of the COPY (DestRC).
+ // If this condition fails, folding would be illegal.
+ const MCInstrDesc &MovDesc = TII->get(MovOp);
+ if (MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1) {
+ const TargetRegisterClass *ResRC =
+ TRI->getRegClass(MovDesc.operands()[0].RegClass);
+ if (!DestRC -> hasSuperClassEq(ResRC)) return;
+ }
+
MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
while (ImpOpI != ImpOpE) {
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index dce11209d0514..8a260c6dfa45e 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -1,7 +1,5 @@
-# RUN: llc -mtriple=amdgcn -run-pass si-fold-operands -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-
-# GCN-LABEL: name: fold-imm-copy
-# GCN: V_AND_B32_e32 65535
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
---
name: fold-imm-copy
@@ -9,6 +7,17 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: $vgpr0, $sgpr0_sgpr1
+ ; CHECK-LABEL: name: fold-imm-copy
+ ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+ ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY1]], 9, 0
+ ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 2, [[COPY]], implicit $exec
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_LSHLREV_B32_e64_]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
+ ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[S_LOAD_DWORDX4_IMM]], 0, 4, 0, 0, implicit $exec
+ ; CHECK-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[BUFFER_LOAD_DWORD_ADDR64_]], implicit $exec
%0:vgpr_32 = COPY $vgpr0
%1:sgpr_64 = COPY $sgpr0_sgpr1
%2:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 9, 0
@@ -20,17 +29,22 @@ body: |
%8:sreg_32_xm0 = S_MOV_B32 65535
%9:vgpr_32 = COPY %8
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
+
...
---
-# GCN-LABEL: name: no_extra_fold_on_same_opnd
-# The first XOR needs commuting to fold that immediate operand.
-# GCN: V_XOR_B32_e32 0, %1
-# GCN: V_XOR_B32_e32 %2, %4.sub0
name: no_extra_fold_on_same_opnd
tracksRegLiveness: true
body: |
bb.0:
+ ; CHECK-LABEL: name: no_extra_fold_on_same_opnd
+ ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[DEF]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
+ ; CHECK-NEXT: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 0, [[DEF1]], implicit $exec
+ ; CHECK-NEXT: [[V_XOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[DEF2]], [[REG_SEQUENCE]].sub0, implicit $exec
%0:vgpr_32 = IMPLICIT_DEF
%1:vgpr_32 = IMPLICIT_DEF
%2:vgpr_32 = IMPLICIT_DEF
@@ -38,20 +52,22 @@ body: |
%4:vreg_64 = REG_SEQUENCE killed %0, %subreg.sub0, killed %3, %subreg.sub1
%5:vgpr_32 = V_XOR_B32_e32 %1, %4.sub1, implicit $exec
%6:vgpr_32 = V_XOR_B32_e32 %2, %4.sub0, implicit $exec
+
...
---
# Make sure the subreg index is not reinterpreted when folding
# immediates
-#
-# GCN-LABEL: name: clear_subreg_imm_fold{{$}}
-# GCN: %1:sgpr_32 = S_MOV_B32 4294967288
-# GCN: %2:sgpr_32 = S_MOV_B32 4294967295
+
name: clear_subreg_imm_fold
tracksRegLiveness: true
body: |
bb.0:
+ ; CHECK-LABEL: name: clear_subreg_imm_fold
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967288
+ ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967295
+ ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
%0:sreg_64 = S_MOV_B64 -8
%1:sgpr_32 = COPY %0.sub0
%2:sgpr_32 = COPY %0.sub1
@@ -59,28 +75,29 @@ body: |
...
-# GCN-LABEL: name: no_fold_imm_into_m0{{$}}
-# GCN: %0:sreg_32 = S_MOV_B32 -8
-# GCN-NEXT: $m0 = COPY %0
-
---
name: no_fold_imm_into_m0
tracksRegLiveness: true
body: |
bb.0:
+ ; CHECK-LABEL: name: no_fold_imm_into_m0
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
+ ; CHECK-NEXT: $m0 = COPY [[S_MOV_B32_]]
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $m0
%0:sreg_32 = S_MOV_B32 -8
$m0 = COPY %0
S_ENDPGM 0, implicit $m0
...
-# GCN-LABEL: name: fold_sgpr_imm_to_vgpr_copy{{$}}
-# GCN: $vgpr0 = V_MOV_B32_e32 -8, implicit $exec
---
name: fold_sgpr_imm_to_vgpr_copy
tracksRegLiveness: true
body: |
bb.0:
+ ; CHECK-LABEL: name: fold_sgpr_imm_to_vgpr_copy
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 -8, implicit $exec
+ ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
%0:sreg_32 = S_MOV_B32 -8
$vgpr0 = COPY %0
S_ENDPGM 0, implicit $vgpr0
@@ -90,16 +107,17 @@ body: |
# The users of $vgpr1 should not be visited for further immediate
# folding.
-# GCN-LABEL: name: no_fold_physreg_users_vgpr{{$}}
-# GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
-# GCN-NEXT: S_NOP 0, implicit-def $vgpr1
-# GCN-NEXT: %2:vgpr_32 = COPY $vgpr1
-# GCN-NEXT: $vgpr2 = COPY %2
---
name: no_fold_physreg_users_vgpr
tracksRegLiveness: true
body: |
bb.0:
+ ; CHECK-LABEL: name: no_fold_physreg_users_vgpr
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr1
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: $vgpr2 = COPY [[COPY]]
+ ; CHECK-NEXT: S_ENDPGM 0
%0:sreg_32 = S_MOV_B32 0
%1:vgpr_32 = COPY %0
$vgpr1 = COPY %0
@@ -111,126 +129,69 @@ body: |
...
---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
+name: s_mov_b32_inlineimm_copy_s_to_av_32
tracksRegLiveness: true
body: |
bb.0:
- ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
- ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
- %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- $agpr0 = COPY %0
- S_ENDPGM 0, implicit $agpr0
-
-...
-
----
-name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
-tracksRegLiveness: true
-body: |
- bb.0:
- ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
- ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- ; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
- ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
- %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- $vgpr0 = COPY %0
- S_ENDPGM 0, implicit $vgpr0
+ ; CHECK-LABEL: name: s_mov_b32_inlineimm_copy_s_to_av_32
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+ ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sreg_32 = S_MOV_B32 32
+ %1:av_32 = COPY %0
+ $agpr0 = COPY %1
+ S_ENDPGM 0
...
---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
+name: v_mov_b32_inlineimm_copy_v_to_av_32
tracksRegLiveness: true
body: |
- bb.0:
- ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
- ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
- %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- %1:agpr_32 = COPY %0
- S_ENDPGM 0, implicit %1
+ bb.0:
+ ; CHECK-LABEL: name: v_mov_b32_inlineimm_copy_v_to_av_32
+ ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+ ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+ %1:av_32 = COPY %0
+ $agpr0 = COPY %1
+ S_ENDPGM 0
...
---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
+name: s_mov_b32_imm_literal_copy_s_to_av_32
tracksRegLiveness: true
body: |
bb.0:
- ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
- %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
- %1:vgpr_32 = COPY %0
- S_ENDPGM 0, implicit %1
+ ; CHECK-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
+ ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+ ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: S_ENDPGM 0
+ %0:sreg_32 = S_MOV_B32 999
+ %1:av_32 = COPY %0
+ $agpr0 = COPY %1
+ S_ENDPGM 0
...
---
-name: v_mov_b32_imm_literal_copy_v_to_agpr_32
+name: v_mov_b32_imm_literal_copy_v_to_av_32
tracksRegLiveness: true
body: |
bb.0:
- ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
- ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
- ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
- ; GCN-NEXT: $agpr0 = COPY [[COPY]]
- ; GCN-NEXT: S_ENDPGM 0
+ ; CHECK-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
+ ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+ ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+ ; CHECK-NEXT: S_ENDPGM 0
%0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
- %1:agpr_32 = COPY %0
+ %1:av_32 = COPY %0
$agpr0 = COPY %1
S_ENDPGM 0
...
-
-# FIXME: Register class restrictions of av register not respected,
-# issue 130020
-
-# ---
-# name: s_mov_b32_inlineimm_copy_s_to_av_32
-# tracksRegLiveness: true
-# body: |
-# bb.0:
-# %0:sreg_32 = S_MOV_B32 32
-# %1:av_32 = COPY %0
-# $agpr0 = COPY %1
-# S_ENDPGM 0
-
-# ...
-
-# ---
-# name: v_mov_b32_inlineimm_copy_v_to_av_32
-# tracksRegLiveness: true
-# body: |
-# bb.0:
-# %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
-# %1:av_32 = COPY %0
-# $agpr0 = COPY %1
-# S_ENDPGM 0
-# ...
-
-# ---
-# name: s_mov_b32_imm_literal_copy_s_to_av_32
-# tracksRegLiveness: true
-# body: |
-# bb.0:
-# %0:sreg_32 = S_MOV_B32 999
-# %1:av_32 = COPY %0
-# $agpr0 = COPY %1
-# S_ENDPGM 0
-
-# ...
-
-# ---
-# name: v_mov_b32_imm_literal_copy_v_to_av_32
-# tracksRegLiveness: true
-# body: |
-# bb.0:
-# %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-# %1:av_32 = COPY %0
-# $agpr0 = COPY %1
-# S_ENDPGM 0
-
-# ...
More information about the llvm-commits
mailing list