[llvm] [AMDGPU][True16][CodeGen] readfirstlane for vgpr16 copy to sgpr32 (PR #118037)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon May 5 09:14:53 PDT 2025
================
@@ -1086,10 +1086,22 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
TRI->getRegClassForOperandReg(*MRI, MI->getOperand(1));
size_t SrcSize = TRI->getRegSizeInBits(*SrcRC);
if (SrcSize == 16) {
- // HACK to handle possible 16bit VGPR source
- auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
- TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg);
- MIB.addReg(SrcReg, 0, AMDGPU::NoSubRegister);
+ assert(MF.getSubtarget<GCNSubtarget>().useRealTrue16Insts() &&
+ "We do not expect to see 16-bit copies from VGPR to SGPR unless "
+ "we have 16-bit VGPRs");
+ assert(MRI->getRegClass(DstReg) == &AMDGPU::SGPR_LO16RegClass ||
+ MRI->getRegClass(DstReg) == &AMDGPU::SReg_32RegClass ||
+ MRI->getRegClass(DstReg) == &AMDGPU::SReg_32_XM0RegClass);
+ // There is no V_READFIRSTLANE_B16, so legalize the dst/src reg to 32 bits
+ MRI->setRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
+ Register VReg32 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ const DebugLoc &DL = MI->getDebugLoc();
+ BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), VReg32)
+ .addImm(0)
+ .addReg(SrcReg, 0)
+ .addImm(AMDGPU::lo16);
----------------
arsenm wrote:
SUBREG_TO_REG is a malformed operation and we should not use it. Either use an IMPLICIT_DEF + an INSERT_SUBREG or REG_SEQUENCE
https://github.com/llvm/llvm-project/pull/118037
More information about the llvm-commits
mailing list