[llvm] [AMDGPU][CodeGen] support v_mov_b16 and v_swap_b16 in true16 format (PR #102198)

Tue Aug 6 11:50:34 PDT 2024

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. :warning:

<details>
<summary>
You can test this locally with the following command:
</summary>

``````````bash
git-clang-format --diff 5e326983b620507940816f4c30ab4d80fa6250ad bc76204a00a5099f59149d02b86bd0ae168f8b23 --extensions cpp -- llvm/lib/Target/AMDGPU/SIFoldOperands.cpp llvm/lib/Target/AMDGPU/SIInstrInfo.cpp llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
``````````

</details>

<details>
<summary>
View the diff from clang-format here.
</summary>

``````````diff

diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index a2352fd892..875738dad7 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -1462,12 +1462,12 @@ bool SIFoldOperands::tryFoldFoldableCopy(
 
   MachineOperand *OpToFoldPtr;
   if (MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e64) {
-	  // Folding when any src_modifiers are non-zero is unsupported
-	  if (TII->hasAnyModifiersSet(MI))
-		  return false;
-	  OpToFoldPtr = &MI.getOperand(2);
+    // Folding when any src_modifiers are non-zero is unsupported
+    if (TII->hasAnyModifiersSet(MI))
+      return false;
+    OpToFoldPtr = &MI.getOperand(2);
   } else
-	  OpToFoldPtr = &MI.getOperand(1);
+    OpToFoldPtr = &MI.getOperand(1);
   MachineOperand &OpToFold = *OpToFoldPtr;
   bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 438d380d85..6f8874daa9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5638,8 +5638,8 @@ void SIInstrInfo::legalizeOpWithMove(MachineInstr &MI, unsigned OpIdx) const {
   const TargetRegisterClass *RC = RI.getRegClass(RCID);
   unsigned Size = RI.getRegSizeInBits(*RC);
   unsigned Opcode = (Size == 64) ? AMDGPU::V_MOV_B64_PSEUDO
-	                : Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
-					               : AMDGPU::V_MOV_B32_e32;
+                    : Size == 16 ? AMDGPU::V_MOV_B16_t16_e64
+                                 : AMDGPU::V_MOV_B32_e32;
   if (MO.isReg())
     Opcode = AMDGPU::COPY;
   else if (RI.isSGPRClass(RC))
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 9369c8685f..1557475514 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -692,8 +692,7 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
     if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
          MovY->getOpcode() != AMDGPU::V_MOV_B16_t16_e32 &&
          MovY->getOpcode() != AMDGPU::COPY) ||
-        !MovY->getOperand(1).isReg()        ||
-        MovY->getOperand(1).getReg() != T   ||
+        !MovY->getOperand(1).isReg() || MovY->getOperand(1).getReg() != T ||
         MovY->getOperand(1).getSubReg() != Tsub)
       continue;
 
@@ -740,13 +739,16 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
 
     LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);
 
-	MachineBasicBlock &MBB = *MovT.getParent();
-    SmallVector<MachineInstr*, 4> Swaps;
+    MachineBasicBlock &MBB = *MovT.getParent();
+    SmallVector<MachineInstr *, 4> Swaps;
     if (Size == 2) {
       auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
                          TII->get(AMDGPU::V_SWAP_B16))
-        .addDef(X).addDef(Y)
-        .addReg(Y).addReg(X).getInstr();
+                     .addDef(X)
+                     .addDef(Y)
+                     .addReg(Y)
+                     .addReg(X)
+                     .getInstr();
       Swaps.push_back(MIB);
     } else {
       assert(Size > 0 && Size % 4 == 0);
@@ -756,10 +758,11 @@ MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
         Y1 = getSubRegForIndex(Y, Ysub, I);
         auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
                            TII->get(AMDGPU::V_SWAP_B32))
-          .addDef(X1.Reg, 0, X1.SubReg)
-          .addDef(Y1.Reg, 0, Y1.SubReg)
-          .addReg(Y1.Reg, 0, Y1.SubReg)
-          .addReg(X1.Reg, 0, X1.SubReg).getInstr();
+                       .addDef(X1.Reg, 0, X1.SubReg)
+                       .addDef(Y1.Reg, 0, Y1.SubReg)
+                       .addReg(Y1.Reg, 0, Y1.SubReg)
+                       .addReg(X1.Reg, 0, X1.SubReg)
+                       .getInstr();
         Swaps.push_back(MIB);
       }
     }

``````````

</details>


https://github.com/llvm/llvm-project/pull/102198