[llvm] [AMDGPU] Add commute for some VOP3 inst (PR #121326)

Fri Jan 3 06:03:34 PST 2025

================
@@ -2798,7 +2798,8 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
   } else if (!Src0.isReg() && Src1.isReg()) {
     if (isOperandLegal(MI, Src1Idx, &Src0))
       CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
-  } else if (isInlineConstant(Src0) && isInlineConstant(Src1)) {
+  } else if (isInlineConstant(Src1)) {
+    // If Src1 is inline constant and Src0 is not, then isOperandLegal rejects
----------------
Shoreshen wrote:

> Or rather, one that takes the full set of operands that need to be considered for the result instruction

Hi @arsenm , I tried to check the swapped instruction with isOperandLegal, it fails 100+ cases. The following is the change:

```cpp
MachineInstr* SIInstrInfo::swapOperands(MachineInstr &MI, bool NewMI, 
                                        unsigned Src0Idx,
                                        unsigned Src1Idx,
                                        MachineOperand &Src0,
                                        MachineOperand &Src1) const {
  MachineInstr *CommutedMI = nullptr;

  if (Src0.isReg() && Src1.isReg()) {
    // Be sure to copy the source modifiers to the right place.
    CommutedMI
        = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
  } else if (Src0.isReg() && !Src1.isReg()) {
    CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
  } else if (!Src0.isReg() && Src1.isReg()) {
    CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
  } else if (Src0.isImm() && Src1.isImm()) {
    CommutedMI = swapImmOperands(MI, Src0, Src1);
  }

  return CommutedMI;
}

MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                                  unsigned Src0Idx,
                                                  unsigned Src1Idx) const {
  assert(!NewMI && "this should never be used");

  unsigned Opc = MI.getOpcode();
  int CommutedOpcode = commuteOpcode(Opc);
  if (CommutedOpcode == -1)
    return nullptr;

  if (Src0Idx > Src1Idx)
    std::swap(Src0Idx, Src1Idx);

  assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
           static_cast<int>(Src0Idx) &&
         AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
           static_cast<int>(Src1Idx) &&
         "inconsistency with findCommutedOpIndices");

  MachineOperand &Src0 = MI.getOperand(Src0Idx);
  MachineOperand &Src1 = MI.getOperand(Src1Idx);
  MachineInstr *CommutedMI = swapOperands(MI, NewMI, Src0Idx, Src1Idx, Src0, Src1);
  if (!CommutedMI)
    return nullptr;
  if (!isOperandLegal(*CommutedMI, Src1Idx, &CommutedMI->getOperand(Src1Idx))) {
    // swap back if failed check
    swapOperands(MI, NewMI, Src0Idx, Src1Idx, Src0, Src1);
    return nullptr;
  }

  if (CommutedMI) {
    swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
                        Src1, AMDGPU::OpName::src1_modifiers);

    swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_sel, Src1,
                        AMDGPU::OpName::src1_sel);

    CommutedMI->setDesc(get(CommutedOpcode));
  }

  return CommutedMI;
}
```

The reason for that is the VALU instructions with literal in the first input operands. 

The `commuteInstruction` function was called during the shrink instruction pass, and mismatched OpIdx and MO (first operand) will be the parameter of isOperandLegal. It return false because MO was counted for 2 times of literal limit (explained in the other reply).

Now if using the swapped instruction, the mismatch no longer exists, then it will move all literal constant to the second input operand for all VALU instructions.

I think maybe open another issue to handle it since it may cost further effort to fix all the cases



https://github.com/llvm/llvm-project/pull/121326