[llvm] cfc74dd - AMDGPU: Constrain readfirstlane operand when writing to m0 (#168004)

via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 14 09:18:48 PST 2025


Author: Matt Arsenault
Date: 2025-11-14T17:18:43Z
New Revision: cfc74dddeffd3e53c7fdb90593db01a01cffda8f

URL: https://github.com/llvm/llvm-project/commit/cfc74dddeffd3e53c7fdb90593db01a01cffda8f
DIFF: https://github.com/llvm/llvm-project/commit/cfc74dddeffd3e53c7fdb90593db01a01cffda8f.diff

LOG: AMDGPU: Constrain readfirstlane operand when writing to m0 (#168004)

Fixes another verifier error after introducing AV registers.
Also fixes not clearing the subregister index if there was
one.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
    llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
    llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 1e3562b37d87c..e1647b76702c4 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -902,14 +902,28 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
     // really much we can do to fix this.
     // Some special instructions use M0 as an input. Some even only use
     // the first lane. Insert a readfirstlane and hope for the best.
-    if (DstReg == AMDGPU::M0 &&
-        TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) {
+    const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
+    if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
       Register TmpReg =
           MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
-      BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
-              TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
+
+      const MCInstrDesc &ReadFirstLaneDesc =
+          TII->get(AMDGPU::V_READFIRSTLANE_B32);
+      BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), ReadFirstLaneDesc, TmpReg)
           .add(MI.getOperand(1));
+
+      unsigned SubReg = MI.getOperand(1).getSubReg();
       MI.getOperand(1).setReg(TmpReg);
+      MI.getOperand(1).setSubReg(AMDGPU::NoSubRegister);
+
+      const TargetRegisterClass *OpRC = TII->getRegClass(ReadFirstLaneDesc, 1);
+      const TargetRegisterClass *ConstrainRC =
+          SubReg == AMDGPU::NoSubRegister
+              ? OpRC
+              : TRI->getMatchingSuperRegClass(SrcRC, OpRC, SubReg);
+
+      if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
+        llvm_unreachable("failed to constrain register");
     } else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
                                       MI, MI.getDebugLoc())) {
       I = std::next(I);

diff  --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
index b05b89fe503f2..116f46df01049 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
@@ -49,4 +49,19 @@ bb16:                                             ; preds = %bb16, %bb
   br label %bb16
 }
 
-
+define void @av_class_to_m0(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: av_class_to_m0:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    global_load_dword v0, v[0:1], off
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    v_readfirstlane_b32 s4, v0
+; CHECK-NEXT:    s_mov_b32 m0, s4
+; CHECK-NEXT:    ;;#ASMSTART
+; CHECK-NEXT:    ; use m0
+; CHECK-NEXT:    ;;#ASMEND
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %load = load i32, ptr addrspace(1) %ptr
+  call void asm sideeffect "; use $0", "{m0}"(i32 %load)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
index ac4f41282ab73..03e3ff95bbad2 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
@@ -90,3 +90,22 @@ body:             |
     S_ENDPGM 0
 ...
 
+---
+name:            constrain_readfirstlane_av64_subreg_m0
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: constrain_readfirstlane_av64_subreg_m0
+    ; CHECK: liveins: $vgpr0_vgpr1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec
+    ; CHECK-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_]]
+    %0:sreg_32 = IMPLICIT_DEF
+    %1:av_64 = COPY $vgpr0_vgpr1
+    $m0 = COPY %1.sub0
+...
+


        


More information about the llvm-commits mailing list