[llvm] [AMDGPU][True16][CodeGen] S_PACK_XX_B32_B16 lowering for true16 mode (PR #162389)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 15 07:58:16 PDT 2025
================
@@ -9072,6 +9115,65 @@ void SIInstrInfo::movePackToVALU(SIInstrWorklist &Worklist,
MachineOperand &Src1 = Inst.getOperand(2);
const DebugLoc &DL = Inst.getDebugLoc();
+ if (ST.useRealTrue16Insts()) {
+ Register SrcReg0, SrcReg1;
+ if (!Src0.isReg() || (Src0.isReg() && !RI.isVGPR(MRI, Src0.getReg()))) {
+ SrcReg0 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg0).add(Src0);
+ } else
+ SrcReg0 = Src0.getReg();
+
+ if (!Src1.isReg() || (Src1.isReg() && !RI.isVGPR(MRI, Src1.getReg()))) {
+ SrcReg1 = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), SrcReg1).add(Src1);
+ } else
+ SrcReg1 = Src1.getReg();
+
+ bool isSrc0Reg16 = MRI.constrainRegClass(SrcReg0, &AMDGPU::VGPR_16RegClass);
+ bool isSrc1Reg16 = MRI.constrainRegClass(SrcReg1, &AMDGPU::VGPR_16RegClass);
----------------
broxigarchen wrote:
For the dst and ops of S_FMAC_F16, I think it's all f16 in the isel, but it's put in a sreg32. Wouldn't it be safe to remove the top zero bit from it when moved to a VALU16?
We can definitly create a vgpr16, and then reg_sequence a vgpr32 on top, but these eventually will be removed in the end
https://github.com/llvm/llvm-project/pull/162389
More information about the llvm-commits
mailing list