[llvm] [AMDGPU] misched: avoid subregister dependencies (PR #140255)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Tue Dec 2 17:54:29 PST 2025


================
@@ -627,6 +627,124 @@ GCNSubtarget::getMaxNumVectorRegs(const Function &F) const {
   return std::pair(MaxNumVGPRs, MaxNumAGPRs);
 }
 
+// Check to which source operand UseOpIdx points to and return a pointer to the
+// operand of the corresponding source modifier.
+// Return nullptr if UseOpIdx either doesn't point to src0/1/2 or if there is no
+// operand for the corresponding source modifier.
+static const MachineOperand *
+getVOP3PSourceModifierFromOpIdx(const MachineInstr *UseI, int UseOpIdx,
+                                const SIInstrInfo &InstrInfo) {
+  AMDGPU::OpName UseModName;
+  AMDGPU::OpName UseName =
+      AMDGPU::getOperandIdxName(UseI->getOpcode(), UseOpIdx);
+  switch (UseName) {
+  case AMDGPU::OpName::src0:
+    UseModName = AMDGPU::OpName::src0_modifiers;
+    break;
+  case AMDGPU::OpName::src1:
+    UseModName = AMDGPU::OpName::src1_modifiers;
+    break;
+  case AMDGPU::OpName::src2:
+    UseModName = AMDGPU::OpName::src2_modifiers;
+    break;
+  default:
+    return nullptr;
+  }
+  return InstrInfo.getNamedOperand(*UseI, UseModName);
+}
+
+// Get the subreg idx of the subreg that is used by the given instruction
+// operand, considering the given op_sel modifier.
+// Return 0 if the whole register is used or as a conservative fallback.
+static unsigned getEffectiveSubRegIdx(const SIRegisterInfo &TRI,
+                                      const SIInstrInfo &InstrInfo,
+                                      const MachineOperand &Op) {
+  const MachineInstr *I = Op.getParent();
+  if (!InstrInfo.isVOP3P(*I) || InstrInfo.isWMMA(*I) || InstrInfo.isSWMMAC(*I))
+    return 0;
+
+  const MachineOperand *OpMod =
+      getVOP3PSourceModifierFromOpIdx(I, Op.getOperandNo(), InstrInfo);
+  if (!OpMod)
+    return 0;
+
+  // Note: the FMA_MIX* and MAD_MIX* instructions have different semantics for
+  // the op_sel and op_sel_hi source modifiers:
+  // - op_sel: selects low/high operand bits as input to the operation;
+  //           has only meaning for 16-bit source operands
+  // - op_sel_hi: specifies the size of the source operands (16 or 32 bits);
+  //              a value of 0 indicates 32 bit, 1 indicates 16 bit
+  // For the other VOP3P instructions, the semantics are:
+  // - op_sel: selects low/high operand bits as input to the operation which
+  //           results in the lower-half of the destination
+  // - op_sel_hi: selects the low/high operand bits as input to the operation
+  //              which results in the higher-half of the destination
+  int64_t OpSel = OpMod->getImm() & SISrcMods::OP_SEL_0;
+  int64_t OpSelHi = OpMod->getImm() & SISrcMods::OP_SEL_1;
+
+  // Check if all parts of the register are being used (= op_sel and op_sel_hi
+  // differ for VOP3P or op_sel_hi=0 for VOP3PMix). In that case we can return
+  // early.
+  if ((!InstrInfo.isVOP3PMix(*I) && (!OpSel || !OpSelHi) &&
+       (OpSel || OpSelHi)) ||
+      (InstrInfo.isVOP3PMix(*I) && !OpSelHi))
+    return 0;
+
+  const MachineRegisterInfo &MRI = I->getParent()->getParent()->getRegInfo();
+  const TargetRegisterClass *RC = TRI.getRegClassForOperandReg(MRI, Op);
+
+  if (unsigned SubRegIdx = OpSel ? AMDGPU::sub1 : AMDGPU::sub0;
+      TRI.getSubClassWithSubReg(RC, SubRegIdx) == RC)
+    return SubRegIdx;
----------------
arsenm wrote:

Don't think this needs to enforce exact match? Not that it probably matters in practice 

https://github.com/llvm/llvm-project/pull/140255


More information about the llvm-commits mailing list