[llvm] 4dee305 - AMDGPU: Fix foldImmediate breaking register class constraints (#127481)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 17 19:34:51 PST 2025


Author: Matt Arsenault
Date: 2025-02-18T10:34:48+07:00
New Revision: 4dee305ce2c92fbffd51ac1948e5916bccf2c9cb

URL: https://github.com/llvm/llvm-project/commit/4dee305ce2c92fbffd51ac1948e5916bccf2c9cb
DIFF: https://github.com/llvm/llvm-project/commit/4dee305ce2c92fbffd51ac1948e5916bccf2c9cb.diff

LOG: AMDGPU: Fix foldImmediate breaking register class constraints (#127481)

This fixes a verifier error when folding an immediate materialized
into an aligned vgpr class into a copy to an unaligned virtual register.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 8481c6333f479..f51527d0eb148 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3473,14 +3473,19 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       assert(UseMI.getOperand(1).getReg().isVirtual());
     }
 
+    MachineFunction *MF = UseMI.getMF();
     const MCInstrDesc &NewMCID = get(NewOpc);
-    if (DstReg.isPhysical() &&
-        !RI.getRegClass(NewMCID.operands()[0].RegClass)->contains(DstReg))
+    const TargetRegisterClass *NewDefRC = getRegClass(NewMCID, 0, &RI, *MF);
+
+    if (DstReg.isPhysical()) {
+      if (!NewDefRC->contains(DstReg))
+        return false;
+    } else if (!MRI->constrainRegClass(DstReg, NewDefRC))
       return false;
 
     UseMI.setDesc(NewMCID);
     UseMI.getOperand(1).ChangeToImmediate(Imm.getSExtValue());
-    UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
+    UseMI.addImplicitDefUseOperands(*MF);
     return true;
   }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
index cceed6fd008e4..227af34f3fa6f 100644
--- a/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/peephole-fold-imm.mir
@@ -419,25 +419,30 @@ body:             |
 
 ...
 
-# FIXME:
-# ---
-# name:            fold_v_mov_b64_64_to_unaligned
-# body:             |
-#   bb.0:
-#     %0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec
-#     %1:vreg_64 = COPY killed %0
-#     SI_RETURN_TO_EPILOG implicit %1
-# ...
-
-# FIXME:
-# ---
-# name:            fold_v_mov_b64_pseudo_64_to_unaligned
-# body:             |
-#   bb.0:
-#     %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
-#     %1:vreg_64 = COPY killed %0
-#     SI_RETURN_TO_EPILOG implicit %1
-# ...
+---
+name:            fold_v_mov_b64_64_to_unaligned
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: fold_v_mov_b64_64_to_unaligned
+    ; GCN: [[V_MOV_B64_e32_:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+    ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]]
+    %0:vreg_64_align2 = V_MOV_B64_e32 1311768467750121200, implicit $exec
+    %1:vreg_64 = COPY killed %0
+    SI_RETURN_TO_EPILOG implicit %1
+...
+
+---
+name:            fold_v_mov_b64_pseudo_64_to_unaligned
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: fold_v_mov_b64_pseudo_64_to_unaligned
+    ; GCN: [[V_MOV_B:%[0-9]+]]:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+    ; GCN-NEXT: SI_RETURN_TO_EPILOG implicit [[V_MOV_B]]
+    %0:vreg_64_align2 = V_MOV_B64_PSEUDO 1311768467750121200, implicit $exec
+    %1:vreg_64 = COPY killed %0
+    SI_RETURN_TO_EPILOG implicit %1
+...
 
 ---
 name:            fold_s_brev_b32_simm_virtual_0


        


More information about the llvm-commits mailing list