[llvm] cfc74dd - AMDGPU: Constrain readfirstlane operand when writing to m0 (#168004)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 14 09:18:48 PST 2025
Author: Matt Arsenault
Date: 2025-11-14T17:18:43Z
New Revision: cfc74dddeffd3e53c7fdb90593db01a01cffda8f
URL: https://github.com/llvm/llvm-project/commit/cfc74dddeffd3e53c7fdb90593db01a01cffda8f
DIFF: https://github.com/llvm/llvm-project/commit/cfc74dddeffd3e53c7fdb90593db01a01cffda8f.diff
LOG: AMDGPU: Constrain readfirstlane operand when writing to m0 (#168004)
Fixes another verifier error after introducing AV registers.
Also fixes not clearing the subregister index if there was
one.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 1e3562b37d87c..e1647b76702c4 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -902,14 +902,28 @@ bool SIFixSGPRCopies::lowerSpecialCase(MachineInstr &MI,
// really much we can do to fix this.
// Some special instructions use M0 as an input. Some even only use
// the first lane. Insert a readfirstlane and hope for the best.
- if (DstReg == AMDGPU::M0 &&
- TRI->hasVectorRegisters(MRI->getRegClass(SrcReg))) {
+ const TargetRegisterClass *SrcRC = MRI->getRegClass(SrcReg);
+ if (DstReg == AMDGPU::M0 && TRI->hasVectorRegisters(SrcRC)) {
Register TmpReg =
MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
- BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
- TII->get(AMDGPU::V_READFIRSTLANE_B32), TmpReg)
+
+ const MCInstrDesc &ReadFirstLaneDesc =
+ TII->get(AMDGPU::V_READFIRSTLANE_B32);
+ BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), ReadFirstLaneDesc, TmpReg)
.add(MI.getOperand(1));
+
+ unsigned SubReg = MI.getOperand(1).getSubReg();
MI.getOperand(1).setReg(TmpReg);
+ MI.getOperand(1).setSubReg(AMDGPU::NoSubRegister);
+
+ const TargetRegisterClass *OpRC = TII->getRegClass(ReadFirstLaneDesc, 1);
+ const TargetRegisterClass *ConstrainRC =
+ SubReg == AMDGPU::NoSubRegister
+ ? OpRC
+ : TRI->getMatchingSuperRegClass(SrcRC, OpRC, SubReg);
+
+ if (!MRI->constrainRegClass(SrcReg, ConstrainRC))
+ llvm_unreachable("failed to constrain register");
} else if (tryMoveVGPRConstToSGPR(MI.getOperand(1), DstReg, MI.getParent(),
MI, MI.getDebugLoc())) {
I = std::next(I);
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
index b05b89fe503f2..116f46df01049 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies-readfirstlane-av-register-regression.ll
@@ -49,4 +49,19 @@ bb16: ; preds = %bb16, %bb
br label %bb16
}
-
+define void @av_class_to_m0(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: av_class_to_m0:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: global_load_dword v0, v[0:1], off
+; CHECK-NEXT: s_waitcnt vmcnt(0)
+; CHECK-NEXT: v_readfirstlane_b32 s4, v0
+; CHECK-NEXT: s_mov_b32 m0, s4
+; CHECK-NEXT: ;;#ASMSTART
+; CHECK-NEXT: ; use m0
+; CHECK-NEXT: ;;#ASMEND
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+ %load = load i32, ptr addrspace(1) %ptr
+ call void asm sideeffect "; use $0", "{m0}"(i32 %load)
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
index ac4f41282ab73..03e3ff95bbad2 100644
--- a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies-av-constrain.mir
@@ -90,3 +90,22 @@ body: |
S_ENDPGM 0
...
+---
+name: constrain_readfirstlane_av64_subreg_m0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: constrain_readfirstlane_av64_subreg_m0
+ ; CHECK: liveins: $vgpr0_vgpr1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; CHECK-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY]].sub0, implicit $exec
+ ; CHECK-NEXT: $m0 = COPY [[V_READFIRSTLANE_B32_]]
+ %0:sreg_32 = IMPLICIT_DEF
+ %1:av_64 = COPY $vgpr0_vgpr1
+ $m0 = COPY %1.sub0
+...
+
More information about the llvm-commits
mailing list