[PATCH] D130101: [AMDGPU] NFC. Assert that mask is full with VOPC DPP

Tue Jul 19 10:25:21 PDT 2022

Joe_Nash created this revision.
Joe_Nash added reviewers: foad, rampitec, vpykhtin.
Herald added subscribers: kosarev, jsilvanus, kerbowa, kbarton, hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, jvesely, nemanjai, kzhuravl, arsenm.
Herald added a project: All.
Joe_Nash requested review of this revision.
Herald added subscribers: llvm-commits, wdng.
Herald added a project: LLVM.

VOPC DPP should not be formed when the row_mask and bank_mask are not
0xf (full) because the resulting VOP DPP would have different semantics
than the MOV DPP followed by VOP. Existing checks in GCNDPPCombine cover
this case but for different reasons, so assert the property for
future-proofing.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D130101

Files:
  llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
  llvm/test/CodeGen/AMDGPU/vopc_dpp.mir


Index: llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
===================================================================

--- llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -67,3 +67,36 @@
     V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec
 
 ...
+---
+
+name: mask_not_full
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GCN-LABEL: name: mask_not_full
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
+    ; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    ; Do not combine VOPC when row_mask or bank_mask is not 0xf
+    ; All cases are covered by generic rules for creating DPP instructions
+    %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
+    V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
+
+    %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
+    %6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
+
+...
Index: llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -202,6 +202,18 @@
     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
     return nullptr;
   }
+  int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
+  // Prior checks cover Mask with VOPC condition, but not on purpose
+  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
+  assert(RowMaskOpnd && RowMaskOpnd->isImm());
+  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
+  assert(BankMaskOpnd && BankMaskOpnd->isImm());
+  const bool MaskAllLanes =
+      RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
+  assert(MaskAllLanes ||
+         !(TII->isVOPC(DPPOp) ||
+           (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
+             "VOPC cannot form DPP unless mask is full");
 
   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
                          OrigMI.getDebugLoc(), TII->get(DPPOp))
@@ -222,7 +234,6 @@
       // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
     }
 
-    int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
     if (OldIdx != -1) {
       assert(OldIdx == NumOperands);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D130101.445867.patch
Type: text/x-patch
Size: 3265 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220719/b47539ff/attachment.bin>