[llvm] [AMDGPU][True16][CodeGen] legalize 16bit and 32bit use-def chain for moveToVALU in si-fix-sgpr-lowering (PR #138734)

Mon May 12 11:25:53 PDT 2025

================
@@ -7225,24 +7225,44 @@ bool SIInstrWorklist::isDeferred(MachineInstr *MI) {
   return DeferredList.contains(MI);
 }
 
-// 16bit SALU use sgpr32. If a 16bit SALU get lowered to VALU in true16 mode,
-// sgpr32 is replaced to vgpr32 which is illegal in t16 inst. Need to add
-// subreg access properly. This can be removed after we have sgpr16 in place
-void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &Inst,
+// legalize operand between 16bit and 32bit registers in v2s copy
+// lowering (change spgr to vgpr).
+// This is mainly caused by 16bit SALU and 16bit VALU using reg with different
+// size. Need to legalize the size of the operands during the vgpr lowering
+// chain. This can be removed after we have sgpr16 in place
+void SIInstrInfo::legalizeOperandsVALUt16(MachineInstr &MI,
                                           MachineRegisterInfo &MRI) const {
-  unsigned Opcode = Inst.getOpcode();
-  if (!AMDGPU::isTrue16Inst(Opcode) || !ST.useRealTrue16Insts())
+  if (!ST.useRealTrue16Insts())
     return;
 
-  for (MachineOperand &Op : Inst.explicit_operands()) {
+  unsigned Opcode = MI.getOpcode();
+  MachineBasicBlock *MBB = MI.getParent();
+
+  // legalize operands and check for size mismatch
+  for (MachineOperand &Op : MI.explicit_operands()) {
     unsigned OpIdx = Op.getOperandNo();
     if (!OpIdx)
       continue;
-    if (Op.isReg() && RI.isVGPR(MRI, Op.getReg())) {
+    if (Op.isReg() && Op.getReg().isVirtual() && RI.isVGPR(MRI, Op.getReg())) {
       unsigned RCID = get(Opcode).operands()[OpIdx].RegClass;
-      const TargetRegisterClass *RC = RI.getRegClass(RCID);
-      if (RI.getRegSizeInBits(*RC) == 16) {
+      const TargetRegisterClass *ExpectedRC = RI.getRegClass(RCID);
+      const TargetRegisterClass *RC = MRI.getRegClass(Op.getReg());
+      if (32 == RI.getRegSizeInBits(*RC) &&
+          16 == RI.getRegSizeInBits(*ExpectedRC)) {
         Op.setSubReg(AMDGPU::lo16);
+      } else if (16 == RI.getRegSizeInBits(*RC) &&
+                 32 == RI.getRegSizeInBits(*ExpectedRC)) {
----------------
broxigarchen wrote:

hoist the class size lookup code

https://github.com/llvm/llvm-project/pull/138734