[llvm] [AMDGPU][True16][CodeGen] readfirstlane for vgpr16 copy to sgpr32 (PR #118037)

Mon May 5 09:14:53 PDT 2025

================
@@ -1086,10 +1086,22 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
         TRI->getRegClassForOperandReg(*MRI, MI->getOperand(1));
     size_t SrcSize = TRI->getRegSizeInBits(*SrcRC);
     if (SrcSize == 16) {
-      // HACK to handle possible 16bit VGPR source
-      auto MIB = BuildMI(*MBB, MI, MI->getDebugLoc(),
-                         TII->get(AMDGPU::V_READFIRSTLANE_B32), DstReg);
-      MIB.addReg(SrcReg, 0, AMDGPU::NoSubRegister);
+      assert(MF.getSubtarget<GCNSubtarget>().useRealTrue16Insts() &&
+             "We do not expect to see 16-bit copies from VGPR to SGPR unless "
+             "we have 16-bit VGPRs");
+      assert(MRI->getRegClass(DstReg) == &AMDGPU::SGPR_LO16RegClass ||
+             MRI->getRegClass(DstReg) == &AMDGPU::SReg_32RegClass ||
+             MRI->getRegClass(DstReg) == &AMDGPU::SReg_32_XM0RegClass);
+      // There is no V_READFIRSTLANE_B16, so legalize the dst/src reg to 32 bits
+      MRI->setRegClass(DstReg, &AMDGPU::SReg_32_XM0RegClass);
+      Register VReg32 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      const DebugLoc &DL = MI->getDebugLoc();
+      BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::SUBREG_TO_REG), VReg32)
+          .addImm(0)
+          .addReg(SrcReg, 0)
+          .addImm(AMDGPU::lo16);
----------------
arsenm wrote:

SUBREG_TO_REG is a malformed operation and we should not use it. Either use an IMPLICIT_DEF + an INSERT_SUBREG or REG_SEQUENCE 

https://github.com/llvm/llvm-project/pull/118037