[llvm] dc850fb - [AMDGPU] NFC. Assert that mask is full with VOPC DPP

Joe Nash via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 20 10:53:13 PDT 2022


Author: Joe Nash
Date: 2022-07-20T13:23:03-04:00
New Revision: dc850fbf3ba9e3fa6f805c3e58dcb87619ad4fdf

URL: https://github.com/llvm/llvm-project/commit/dc850fbf3ba9e3fa6f805c3e58dcb87619ad4fdf
DIFF: https://github.com/llvm/llvm-project/commit/dc850fbf3ba9e3fa6f805c3e58dcb87619ad4fdf.diff

LOG: [AMDGPU] NFC. Assert that mask is full with VOPC DPP

VOPC DPP should not be formed when the row_mask and bank_mask are not
0xf (full) because the resulting VOP DPP would have different semantics
than the MOV DPP followed by VOP. Existing checks in GCNDPPCombine cover
this case but for different reasons, so assert the property for
future-proofing.

Reviewed By: nhaehnle

Differential Revision: https://reviews.llvm.org/D130101

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
    llvm/test/CodeGen/AMDGPU/vopc_dpp.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index a2f37768f5c6c..99bef5416d9c3 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -202,6 +202,18 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
     return nullptr;
   }
+  int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
+  // Prior checks cover Mask with VOPC condition, but not on purpose
+  auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
+  assert(RowMaskOpnd && RowMaskOpnd->isImm());
+  auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
+  assert(BankMaskOpnd && BankMaskOpnd->isImm());
+  const bool MaskAllLanes =
+      RowMaskOpnd->getImm() == 0xF && BankMaskOpnd->getImm() == 0xF;
+  assert(MaskAllLanes ||
+         !(TII->isVOPC(DPPOp) ||
+           (TII->isVOP3(DPPOp) && OrigOpE32 != -1 && TII->isVOPC(OrigOpE32))) &&
+             "VOPC cannot form DPP unless mask is full");
 
   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
                          OrigMI.getDebugLoc(), TII->get(DPPOp))
@@ -222,7 +234,6 @@ MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
       // If we shrunk a 64bit vop3b to 32bits, just ignore the sdst
     }
 
-    int OrigOpE32 = AMDGPU::getVOPe32(OrigOp);
     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
     if (OldIdx != -1) {
       assert(OldIdx == NumOperands);

diff  --git a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
index 3bea21c5f1954..450951e9d11bf 100644
--- a/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
+++ b/llvm/test/CodeGen/AMDGPU/vopc_dpp.mir
@@ -67,3 +67,36 @@ body:             |
     V_CMP_LT_I32_e32 %0, %18, implicit-def $vcc, implicit $exec
 
 ...
+---
+
+name: mask_not_full
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; GCN-LABEL: name: mask_not_full
+    ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; GCN-NEXT: {{  $}}
+    ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+    ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[DEF]], [[COPY1]], 1, 15, 14, 1, implicit $exec
+    ; GCN-NEXT: V_CMP_CLASS_F16_e32 [[V_MOV_B32_dpp]], [[COPY]], implicit-def $vcc, implicit $mode, implicit $exec
+    ; GCN-NEXT: [[V_MOV_B32_dpp1:%[0-9]+]]:vgpr_32 = V_MOV_B32_dpp [[V_MOV_B32_e32_]], [[COPY1]], 1, 13, 15, 1, implicit $exec
+    ; GCN-NEXT: [[V_CMP_GE_F16_e64_:%[0-9]+]]:sgpr_32 = V_CMP_GE_F16_e64 1, [[V_MOV_B32_dpp1]], 0, [[COPY]], 1, implicit $mode, implicit $exec
+    %0:vgpr_32 = COPY $vgpr0
+    %1:vgpr_32 = COPY $vgpr1
+    %2:vgpr_32 = IMPLICIT_DEF
+    %3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+
+    ; Do not combine VOPC when row_mask or bank_mask is not 0xf
+    ; All cases are covered by generic rules for creating DPP instructions
+    %4:vgpr_32 = V_MOV_B32_dpp %2, %1, 1, 15, 14, 1, implicit $exec
+    V_CMP_CLASS_F16_e32 %4, %0, implicit-def $vcc, implicit $mode, implicit $exec
+
+    %5:vgpr_32 = V_MOV_B32_dpp %3, %1, 1, 13, 15, 1, implicit $exec
+    %6:sgpr_32 = V_CMP_GE_F16_e64 1, %5, 0, %0, 1, implicit $mode, implicit $exec
+
+...


        


More information about the llvm-commits mailing list