[llvm] [AMDGPU] Fix register class constraints for si-fold-operands pass when folding immediate into copies (PR #131387)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 26 11:07:22 PDT 2025


https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/131387

>From d2316eedc1508eabccbedb6df35f0da4d1cc9a38 Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Fri, 14 Mar 2025 16:14:19 -0400
Subject: [PATCH] [AMDGPU] Fix register class constraints for si-fold-operands
 pass

Fixes #130020

This fixes an issue where the si-fold-operands pass would incorrectly
fold immediate values into COPY instructions targeting av_32 registers,
which is illegal.

The pass now properly checks register class constraints before attempting to
fold the immediates.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp  |  10 ++
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir | 197 +++++++++------------
 2 files changed, 89 insertions(+), 118 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 91df516b80857..deb57eac69be2 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1047,6 +1047,16 @@ void SIFoldOperandsImpl::foldOperand(
     if (MovOp == AMDGPU::COPY)
       return;
 
+    // Fold if the destination register class of the MOV instruction (ResRC)
+    // is a superclass of (or equal to) the destination register class of the COPY (DestRC).
+    // If this condition fails, folding would be illegal.
+    const MCInstrDesc &MovDesc = TII->get(MovOp);
+    if (MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1) {
+      const TargetRegisterClass *ResRC = 
+          TRI->getRegClass(MovDesc.operands()[0].RegClass);
+      if (!DestRC -> hasSuperClassEq(ResRC)) return;
+    }
+
     MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
     MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
     while (ImpOpI != ImpOpE) {
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index dce11209d0514..8a260c6dfa45e 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -1,7 +1,5 @@
-# RUN: llc -mtriple=amdgcn -run-pass si-fold-operands -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
-
-# GCN-LABEL:       name: fold-imm-copy
-# GCN:             V_AND_B32_e32 65535
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -run-pass=si-fold-operands -verify-machineinstrs -o - %s | FileCheck %s
 
 ---
 name: fold-imm-copy
@@ -9,6 +7,17 @@ tracksRegLiveness: true
 body:             |
   bb.0:
     liveins: $vgpr0, $sgpr0_sgpr1
+    ; CHECK-LABEL: name: fold-imm-copy
+    ; CHECK: liveins: $vgpr0, $sgpr0_sgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_64 = COPY $sgpr0_sgpr1
+    ; CHECK-NEXT: [[S_LOAD_DWORDX4_IMM:%[0-9]+]]:sgpr_128 = S_LOAD_DWORDX4_IMM [[COPY1]], 9, 0
+    ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 2, [[COPY]], implicit $exec
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[V_LSHLREV_B32_e64_]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
+    ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_ADDR64_:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 [[REG_SEQUENCE]], [[S_LOAD_DWORDX4_IMM]], 0, 4, 0, 0, implicit $exec
+    ; CHECK-NEXT: [[V_AND_B32_e32_:%[0-9]+]]:vgpr_32 = V_AND_B32_e32 65535, [[BUFFER_LOAD_DWORD_ADDR64_]], implicit $exec
     %0:vgpr_32 = COPY $vgpr0
     %1:sgpr_64 = COPY $sgpr0_sgpr1
     %2:sgpr_128 = S_LOAD_DWORDX4_IMM %1, 9, 0
@@ -20,17 +29,22 @@ body:             |
     %8:sreg_32_xm0 = S_MOV_B32 65535
     %9:vgpr_32 = COPY %8
     %10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
+
 ...
 
 ---
-# GCN-LABEL:       name: no_extra_fold_on_same_opnd
-# The first XOR needs commuting to fold that immediate operand.
-# GCN:             V_XOR_B32_e32 0, %1
-# GCN:             V_XOR_B32_e32 %2, %4.sub0
 name: no_extra_fold_on_same_opnd
 tracksRegLiveness: true
 body:             |
   bb.0:
+    ; CHECK-LABEL: name: no_extra_fold_on_same_opnd
+    ; CHECK: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[DEF]], %subreg.sub0, killed [[V_MOV_B32_e32_]], %subreg.sub1
+    ; CHECK-NEXT: [[V_XOR_B32_e32_:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 0, [[DEF1]], implicit $exec
+    ; CHECK-NEXT: [[V_XOR_B32_e32_1:%[0-9]+]]:vgpr_32 = V_XOR_B32_e32 [[DEF2]], [[REG_SEQUENCE]].sub0, implicit $exec
     %0:vgpr_32 = IMPLICIT_DEF
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = IMPLICIT_DEF
@@ -38,20 +52,22 @@ body:             |
     %4:vreg_64 = REG_SEQUENCE killed %0, %subreg.sub0, killed %3, %subreg.sub1
     %5:vgpr_32 = V_XOR_B32_e32 %1, %4.sub1, implicit $exec
     %6:vgpr_32 = V_XOR_B32_e32 %2, %4.sub0, implicit $exec
+
 ...
 
 ---
 
 # Make sure the subreg index is not reinterpreted when folding
 # immediates
-#
-# GCN-LABEL: name: clear_subreg_imm_fold{{$}}
-# GCN: %1:sgpr_32 = S_MOV_B32 4294967288
-# GCN: %2:sgpr_32 = S_MOV_B32 4294967295
+
 name: clear_subreg_imm_fold
 tracksRegLiveness: true
 body:             |
   bb.0:
+    ; CHECK-LABEL: name: clear_subreg_imm_fold
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967288
+    ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sgpr_32 = S_MOV_B32 4294967295
+    ; CHECK-NEXT: S_ENDPGM 0, implicit [[S_MOV_B32_]], implicit [[S_MOV_B32_1]]
     %0:sreg_64 = S_MOV_B64 -8
     %1:sgpr_32 = COPY %0.sub0
     %2:sgpr_32 = COPY %0.sub1
@@ -59,28 +75,29 @@ body:             |
 
 ...
 
-# GCN-LABEL: name: no_fold_imm_into_m0{{$}}
-# GCN: %0:sreg_32 = S_MOV_B32 -8
-# GCN-NEXT: $m0 = COPY %0
-
 ---
 name: no_fold_imm_into_m0
 tracksRegLiveness: true
 body:             |
   bb.0:
+    ; CHECK-LABEL: name: no_fold_imm_into_m0
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -8
+    ; CHECK-NEXT: $m0 = COPY [[S_MOV_B32_]]
+    ; CHECK-NEXT: S_ENDPGM 0, implicit $m0
     %0:sreg_32 = S_MOV_B32 -8
     $m0 = COPY %0
     S_ENDPGM 0, implicit $m0
 
 ...
 
-# GCN-LABEL: name: fold_sgpr_imm_to_vgpr_copy{{$}}
-# GCN: $vgpr0 = V_MOV_B32_e32 -8, implicit $exec
 ---
 name: fold_sgpr_imm_to_vgpr_copy
 tracksRegLiveness: true
 body:             |
   bb.0:
+    ; CHECK-LABEL: name: fold_sgpr_imm_to_vgpr_copy
+    ; CHECK: $vgpr0 = V_MOV_B32_e32 -8, implicit $exec
+    ; CHECK-NEXT: S_ENDPGM 0, implicit $vgpr0
     %0:sreg_32 = S_MOV_B32 -8
     $vgpr0 = COPY %0
     S_ENDPGM 0, implicit $vgpr0
@@ -90,16 +107,17 @@ body:             |
 # The users of $vgpr1 should not be visited for further immediate
 # folding.
 
-# GCN-LABEL: name: no_fold_physreg_users_vgpr{{$}}
-# GCN: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
-# GCN-NEXT: S_NOP 0, implicit-def $vgpr1
-# GCN-NEXT: %2:vgpr_32 = COPY $vgpr1
-# GCN-NEXT: $vgpr2 = COPY %2
 ---
 name: no_fold_physreg_users_vgpr
 tracksRegLiveness: true
 body:             |
   bb.0:
+    ; CHECK-LABEL: name: no_fold_physreg_users_vgpr
+    ; CHECK: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+    ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr1
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: $vgpr2 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:sreg_32 = S_MOV_B32 0
     %1:vgpr_32 = COPY %0
     $vgpr1 = COPY %0
@@ -111,126 +129,69 @@ body:             |
 ...
 
 ---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
+name: s_mov_b32_inlineimm_copy_s_to_av_32
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    $agpr0 = COPY %0
-    S_ENDPGM 0, implicit $agpr0
-
-...
-
----
-name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    $vgpr0 = COPY %0
-    S_ENDPGM 0, implicit $vgpr0
+    ; CHECK-LABEL: name: s_mov_b32_inlineimm_copy_s_to_av_32
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 32
+    %1:av_32 = COPY %0
+    $agpr0 = COPY %1
+    S_ENDPGM 0
 
 ...
 
 ---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
+name: v_mov_b32_inlineimm_copy_v_to_av_32
 tracksRegLiveness: true
 body:             |
-  bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
-    ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    %1:agpr_32 = COPY %0
-    S_ENDPGM 0, implicit %1
+ bb.0:
+    ; CHECK-LABEL: name: v_mov_b32_inlineimm_copy_v_to_av_32
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
+   %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+   %1:av_32 = COPY %0
+   $agpr0 = COPY %1
+   S_ENDPGM 0
 
 ...
 
 ---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
+name: s_mov_b32_imm_literal_copy_s_to_av_32
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
-    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    %1:vgpr_32 = COPY %0
-    S_ENDPGM 0, implicit %1
+    ; CHECK-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 999
+    %1:av_32 = COPY %0
+    $agpr0 = COPY %1
+    S_ENDPGM 0
 
 ...
 
 ---
-name: v_mov_b32_imm_literal_copy_v_to_agpr_32
+name: v_mov_b32_imm_literal_copy_v_to_av_32
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
-    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
-    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
-    ; GCN-NEXT: S_ENDPGM 0
+    ; CHECK-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-    %1:agpr_32 = COPY %0
+    %1:av_32 = COPY %0
     $agpr0 = COPY %1
     S_ENDPGM 0
 
 ...
-
-# FIXME: Register class restrictions of av register not respected,
-# issue 130020
-
-# ---
-# name: s_mov_b32_inlineimm_copy_s_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:sreg_32 = S_MOV_B32 32
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
-
-# ...
-
-# ---
-# name: v_mov_b32_inlineimm_copy_v_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#  bb.0:
-#    %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
-#    %1:av_32 = COPY %0
-#    $agpr0 = COPY %1
-#    S_ENDPGM 0
-# ...
-
-# ---
-# name: s_mov_b32_imm_literal_copy_s_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:sreg_32 = S_MOV_B32 999
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
-
-# ...
-
-# ---
-# name: v_mov_b32_imm_literal_copy_v_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
-
-# ...



More information about the llvm-commits mailing list