[llvm] [AMDGPU] Fix register class constraints for si-fold-operands pass when folding immediate into copies (PR #131387)

via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 15 12:01:38 PDT 2025


https://github.com/mssefat updated https://github.com/llvm/llvm-project/pull/131387

>From e2e886dd35bf524f7ab11d3285701b5dbf549a37 Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Fri, 14 Mar 2025 16:14:19 -0400
Subject: [PATCH 1/3] [AMDGPU] Fix register class constraints for
 si-fold-operands pass

Fixes #130020

This fixes an issue where the si-fold-operands pass would incorrectly
fold immediate values into COPY instructions targeting av_32 registers,
which is illegal.

The pass now properly checks register class constraints before attempting to
fold the immediates.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp  |  10 ++
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir | 136 +++++++--------------
 2 files changed, 51 insertions(+), 95 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index d6acf9e081b9f..14d1a5de7da6c 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1068,6 +1068,16 @@ void SIFoldOperandsImpl::foldOperand(
     if (MovOp == AMDGPU::COPY)
       return;
 
+    // Fold if the destination register class of the MOV instruction (ResRC)
+    // is a superclass of (or equal to) the destination register class of the COPY (DestRC).
+    // If this condition fails, folding would be illegal.
+    const MCInstrDesc &MovDesc = TII->get(MovOp);
+    if (MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1) {
+      const TargetRegisterClass *ResRC = 
+          TRI->getRegClass(MovDesc.operands()[0].RegClass);
+      if (!DestRC -> hasSuperClassEq(ResRC)) return;
+    }
+
     MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
     MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
     while (ImpOpI != ImpOpE) {
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index 300bae7551ca5..83832a64e4791 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -29,6 +29,7 @@ body:             |
     %8:sreg_32_xm0 = S_MOV_B32 65535
     %9:vgpr_32 = COPY %8
     %10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
+
 ...
 
 ---
@@ -52,6 +53,7 @@ body:             |
     %4:vreg_64 = REG_SEQUENCE killed %0, %subreg.sub0, killed %3, %subreg.sub1
     %5:vgpr_32 = V_XOR_B32_e32 %1, %4.sub1, implicit $exec
     %6:vgpr_32 = V_XOR_B32_e32 %2, %4.sub0, implicit $exec
+
 ...
 
 ---
@@ -59,6 +61,7 @@ body:             |
 # Make sure the subreg index is not reinterpreted when folding
 # immediates
 #
+
 name: clear_subreg_imm_fold
 tracksRegLiveness: true
 body:             |
@@ -128,126 +131,69 @@ body:             |
 ...
 
 ---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
+name: s_mov_b32_inlineimm_copy_s_to_av_32
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    $agpr0 = COPY %0
-    S_ENDPGM 0, implicit $agpr0
-
-...
-
----
-name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
-tracksRegLiveness: true
-body:             |
-  bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
-    ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    ; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
-    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    $vgpr0 = COPY %0
-    S_ENDPGM 0, implicit $vgpr0
+    ; CHECK-LABEL: name: s_mov_b32_inlineimm_copy_s_to_av_32
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 32
+    %1:av_32 = COPY %0
+    $agpr0 = COPY %1
+    S_ENDPGM 0
 
 ...
 
 ---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
+name: v_mov_b32_inlineimm_copy_v_to_av_32
 tracksRegLiveness: true
 body:             |
-  bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
-    ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    %1:agpr_32 = COPY %0
-    S_ENDPGM 0, implicit %1
+ bb.0:
+    ; CHECK-LABEL: name: v_mov_b32_inlineimm_copy_v_to_av_32
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
+   %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+   %1:av_32 = COPY %0
+   $agpr0 = COPY %1
+   S_ENDPGM 0
 
 ...
 
 ---
-name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
+name: s_mov_b32_imm_literal_copy_s_to_av_32
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
-    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
-    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
-    %1:vgpr_32 = COPY %0
-    S_ENDPGM 0, implicit %1
+    ; CHECK-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
+    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 999
+    %1:av_32 = COPY %0
+    $agpr0 = COPY %1
+    S_ENDPGM 0
 
 ...
 
 ---
-name: v_mov_b32_imm_literal_copy_v_to_agpr_32
+name: v_mov_b32_imm_literal_copy_v_to_av_32
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
-    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-    ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
-    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
-    ; GCN-NEXT: S_ENDPGM 0
+    ; CHECK-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
+    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
+    ; CHECK-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-    %1:agpr_32 = COPY %0
+    %1:av_32 = COPY %0
     $agpr0 = COPY %1
     S_ENDPGM 0
 
 ...
-
-# FIXME: Register class restrictions of av register not respected,
-# issue 130020
-
-# ---
-# name: s_mov_b32_inlineimm_copy_s_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:sreg_32 = S_MOV_B32 32
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
-
-# ...
-
-# ---
-# name: v_mov_b32_inlineimm_copy_v_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#  bb.0:
-#    %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
-#    %1:av_32 = COPY %0
-#    $agpr0 = COPY %1
-#    S_ENDPGM 0
-# ...
-
-# ---
-# name: s_mov_b32_imm_literal_copy_s_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:sreg_32 = S_MOV_B32 999
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
-
-# ...
-
-# ---
-# name: v_mov_b32_imm_literal_copy_v_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
-
-# ...

>From 26a3099f45b6760ac6422545ded16c6605438bf8 Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Tue, 15 Apr 2025 10:56:10 -0400
Subject: [PATCH 2/3] Removed changes for fold-imm-copy.mir and created a nfc
 for it with a separate PR

---
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir | 142 +++++++++++++++------
 1 file changed, 101 insertions(+), 41 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index 83832a64e4791..b56109962c762 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -29,7 +29,6 @@ body:             |
     %8:sreg_32_xm0 = S_MOV_B32 65535
     %9:vgpr_32 = COPY %8
     %10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
-
 ...
 
 ---
@@ -53,7 +52,6 @@ body:             |
     %4:vreg_64 = REG_SEQUENCE killed %0, %subreg.sub0, killed %3, %subreg.sub1
     %5:vgpr_32 = V_XOR_B32_e32 %1, %4.sub1, implicit $exec
     %6:vgpr_32 = V_XOR_B32_e32 %2, %4.sub0, implicit $exec
-
 ...
 
 ---
@@ -61,7 +59,6 @@ body:             |
 # Make sure the subreg index is not reinterpreted when folding
 # immediates
 #
-
 name: clear_subreg_imm_fold
 tracksRegLiveness: true
 body:             |
@@ -77,6 +74,10 @@ body:             |
 
 ...
 
+# GCN-LABEL: name: no_fold_imm_into_m0{{$}}
+# GCN: %0:sreg_32 = S_MOV_B32 -8
+# GCN-NEXT: $m0 = COPY %0
+
 ---
 name: no_fold_imm_into_m0
 tracksRegLiveness: true
@@ -92,6 +93,8 @@ body:             |
 
 ...
 
+# GCN-LABEL: name: fold_sgpr_imm_to_vgpr_copy{{$}}
+# GCN: $vgpr0 = V_MOV_B32_e32 -8, implicit $exec
 ---
 name: fold_sgpr_imm_to_vgpr_copy
 tracksRegLiveness: true
@@ -131,69 +134,126 @@ body:             |
 ...
 
 ---
-name: s_mov_b32_inlineimm_copy_s_to_av_32
+name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; CHECK-LABEL: name: s_mov_b32_inlineimm_copy_s_to_av_32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
-    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sreg_32 = S_MOV_B32 32
-    %1:av_32 = COPY %0
-    $agpr0 = COPY %1
-    S_ENDPGM 0
+    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_agpr
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: $agpr0 = COPY [[AV_MOV_]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0
+    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+    $agpr0 = COPY %0
+    S_ENDPGM 0, implicit $agpr0
 
 ...
 
 ---
-name: v_mov_b32_inlineimm_copy_v_to_av_32
+name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
 tracksRegLiveness: true
 body:             |
- bb.0:
-    ; CHECK-LABEL: name: v_mov_b32_inlineimm_copy_v_to_av_32
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
-    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
-    ; CHECK-NEXT: S_ENDPGM 0
-   %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
-   %1:av_32 = COPY %0
-   $agpr0 = COPY %1
-   S_ENDPGM 0
+  bb.0:
+    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_physreg_vgpr
+    ; GCN: [[AV_MOV_:%[0-9]+]]:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+    ; GCN-NEXT: $vgpr0 = COPY [[AV_MOV_]]
+    ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0
+    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+    $vgpr0 = COPY %0
+    S_ENDPGM 0, implicit $vgpr0
 
 ...
 
 ---
-name: s_mov_b32_imm_literal_copy_s_to_av_32
+name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; CHECK-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
-    ; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
-    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
-    ; CHECK-NEXT: S_ENDPGM 0
-    %0:sreg_32 = S_MOV_B32 999
-    %1:av_32 = COPY %0
-    $agpr0 = COPY %1
-    S_ENDPGM 0
+    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_agpr
+    ; GCN: [[V_ACCVGPR_WRITE_B32_e64_:%[0-9]+]]:agpr_32 = V_ACCVGPR_WRITE_B32_e64 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_ACCVGPR_WRITE_B32_e64_]]
+    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+    %1:agpr_32 = COPY %0
+    S_ENDPGM 0, implicit %1
 
 ...
 
 ---
-name: v_mov_b32_imm_literal_copy_v_to_av_32
+name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
 tracksRegLiveness: true
 body:             |
   bb.0:
-    ; CHECK-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
-    ; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-    ; CHECK-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
-    ; CHECK-NEXT: $agpr0 = COPY [[COPY]]
-    ; CHECK-NEXT: S_ENDPGM 0
+    ; GCN-LABEL: name: av_mov_b32_imm_pseudo_copy_av_32_to_virtreg_vgpr
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: S_ENDPGM 0, implicit [[V_MOV_B32_e32_]]
+    %0:av_32 = AV_MOV_B32_IMM_PSEUDO 0, implicit $exec
+    %1:vgpr_32 = COPY %0
+    S_ENDPGM 0, implicit %1
+
+...
+
+---
+name: v_mov_b32_imm_literal_copy_v_to_agpr_32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_agpr_32
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:agpr_32 = COPY [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+    ; GCN-NEXT: S_ENDPGM 0
     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-    %1:av_32 = COPY %0
+    %1:agpr_32 = COPY %0
     $agpr0 = COPY %1
     S_ENDPGM 0
 
 ...
+
+# FIXME: Register class restrictions of av register not respected,
+# issue 130020
+
+# ---
+# name: s_mov_b32_inlineimm_copy_s_to_av_32
+# tracksRegLiveness: true
+# body:             |
+#   bb.0:
+#     %0:sreg_32 = S_MOV_B32 32
+#     %1:av_32 = COPY %0
+#     $agpr0 = COPY %1
+#     S_ENDPGM 0
+
+# ...
+
+# ---
+# name: v_mov_b32_inlineimm_copy_v_to_av_32
+# tracksRegLiveness: true
+# body:             |
+#  bb.0:
+#    %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+#    %1:av_32 = COPY %0
+#    $agpr0 = COPY %1
+#    S_ENDPGM 0
+# ...
+
+# ---
+# name: s_mov_b32_imm_literal_copy_s_to_av_32
+# tracksRegLiveness: true
+# body:             |
+#   bb.0:
+#     %0:sreg_32 = S_MOV_B32 999
+#     %1:av_32 = COPY %0
+#     $agpr0 = COPY %1
+#     S_ENDPGM 0
+
+# ...
+
+# ---
+# name: v_mov_b32_imm_literal_copy_v_to_av_32
+# tracksRegLiveness: true
+# body:             |
+#   bb.0:
+#     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+#     %1:av_32 = COPY %0
+#     $agpr0 = COPY %1
+#     S_ENDPGM 0
+
+# ...

>From 547d52f6e038a0b42fed349875f37cacc8818921 Mon Sep 17 00:00:00 2001
From: mssefat <syadus.sefat at gmail.com>
Date: Tue, 15 Apr 2025 14:57:05 -0400
Subject: [PATCH 3/3] Fixes #130020

This fixes an issue where the si-fold-operands pass would incorrectly
fold immediate values into COPY instructions targeting av_32 registers,
which is illegal.

The pass now properly checks register class constraints before attempting to
fold the immediates.
---
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp  |  20 ++--
 llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir | 102 ++++++++++++---------
 2 files changed, 72 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 14d1a5de7da6c..77d3b09d469b0 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1068,15 +1068,19 @@ void SIFoldOperandsImpl::foldOperand(
     if (MovOp == AMDGPU::COPY)
       return;
 
-    // Fold if the destination register class of the MOV instruction (ResRC)
-    // is a superclass of (or equal to) the destination register class of the COPY (DestRC).
-    // If this condition fails, folding would be illegal.
+    // Check for common register subclass between destination (DestRC) and MOV
+    // result (ResRC). If exists, verify this common subclass is a superclass of
+    // (or equal to) the destination register class, otherwise folding is
+    // illegal.
+
     const MCInstrDesc &MovDesc = TII->get(MovOp);
-    if (MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1) {
-      const TargetRegisterClass *ResRC = 
-          TRI->getRegClass(MovDesc.operands()[0].RegClass);
-      if (!DestRC -> hasSuperClassEq(ResRC)) return;
-    }
+    assert(MovDesc.getNumDefs() > 0 && MovDesc.operands()[0].RegClass != -1);
+    const TargetRegisterClass *ResRC =
+        TRI->getRegClass(MovDesc.operands()[0].RegClass);
+    const TargetRegisterClass *CommonRC = TRI->getCommonSubClass(DestRC, ResRC);
+
+    if (!CommonRC || !DestRC->hasSuperClassEq(CommonRC))
+      return;
 
     MachineInstr::mop_iterator ImpOpI = UseMI->implicit_operands().begin();
     MachineInstr::mop_iterator ImpOpE = UseMI->implicit_operands().end();
diff --git a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
index b56109962c762..c9d2f8ecd8fb4 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-imm-copy.mir
@@ -208,52 +208,70 @@ body:             |
 
 ...
 
-# FIXME: Register class restrictions of av register not respected,
-# issue 130020
+---
+name: s_mov_b32_inlineimm_copy_s_to_av_32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: s_mov_b32_inlineimm_copy_s_to_av_32
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 32
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+    ; GCN-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 32
+    %1:av_32 = COPY %0
+    $agpr0 = COPY %1
+    S_ENDPGM 0
 
-# ---
-# name: s_mov_b32_inlineimm_copy_s_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:sreg_32 = S_MOV_B32 32
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
+...
 
-# ...
+---
+name: v_mov_b32_inlineimm_copy_v_to_av_32
+tracksRegLiveness: true
+body:             |
+ bb.0:
+    ; GCN-LABEL: name: v_mov_b32_inlineimm_copy_v_to_av_32
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+    ; GCN-NEXT: S_ENDPGM 0
+   %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
+   %1:av_32 = COPY %0
+   $agpr0 = COPY %1
+   S_ENDPGM 0
 
-# ---
-# name: v_mov_b32_inlineimm_copy_v_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#  bb.0:
-#    %0:vgpr_32 = V_MOV_B32_e32 32, implicit $exec
-#    %1:av_32 = COPY %0
-#    $agpr0 = COPY %1
-#    S_ENDPGM 0
-# ...
+...
 
-# ---
-# name: s_mov_b32_imm_literal_copy_s_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:sreg_32 = S_MOV_B32 999
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
+---
+name: s_mov_b32_imm_literal_copy_s_to_av_32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: s_mov_b32_imm_literal_copy_s_to_av_32
+    ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 999
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[S_MOV_B32_]]
+    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+    ; GCN-NEXT: S_ENDPGM 0
+    %0:sreg_32 = S_MOV_B32 999
+    %1:av_32 = COPY %0
+    $agpr0 = COPY %1
+    S_ENDPGM 0
 
-# ...
+...
 
-# ---
-# name: v_mov_b32_imm_literal_copy_v_to_av_32
-# tracksRegLiveness: true
-# body:             |
-#   bb.0:
-#     %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-#     %1:av_32 = COPY %0
-#     $agpr0 = COPY %1
-#     S_ENDPGM 0
+---
+name: v_mov_b32_imm_literal_copy_v_to_av_32
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: v_mov_b32_imm_literal_copy_v_to_av_32
+    ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:av_32 = COPY [[V_MOV_B32_e32_]]
+    ; GCN-NEXT: $agpr0 = COPY [[COPY]]
+    ; GCN-NEXT: S_ENDPGM 0
+    %0:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
+    %1:av_32 = COPY %0
+    $agpr0 = COPY %1
+    S_ENDPGM 0
 
-# ...
+...



More information about the llvm-commits mailing list