[llvm] 87c46a3 - AMDGPU: Don't error on ds.ordered intrinsic in function

Fri Jan 24 13:07:04 PST 2020

Author: Matt Arsenault
Date: 2020-01-24T13:06:44-08:00
New Revision: 87c46a312975b193bf056b992ae6d778e528026b

URL: https://github.com/llvm/llvm-project/commit/87c46a312975b193bf056b992ae6d778e528026b
DIFF: https://github.com/llvm/llvm-project/commit/87c46a312975b193bf056b992ae6d778e528026b.diff

LOG: AMDGPU: Don't error on ds.ordered intrinsic in function

These should be assumed to be called from a compute context. Also
don't use a 2 entry switch over constants.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 84f26cccd7d0..cb03b908e610 100644

--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6081,6 +6081,28 @@ static unsigned getBufferOffsetForMMO(SDValue VOffset,
          cast<ConstantSDNode>(Offset)->getSExtValue();
 }
 
+static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
+  switch (MF.getFunction().getCallingConv()) {
+  case CallingConv::AMDGPU_PS:
+    return 1;
+  case CallingConv::AMDGPU_VS:
+    return 2;
+  case CallingConv::AMDGPU_GS:
+    return 3;
+  case CallingConv::AMDGPU_HS:
+  case CallingConv::AMDGPU_LS:
+  case CallingConv::AMDGPU_ES:
+    report_fatal_error("ds_ordered_count unsupported for this calling conv");
+  case CallingConv::AMDGPU_CS:
+  case CallingConv::AMDGPU_KERNEL:
+  case CallingConv::C:
+  case CallingConv::Fast:
+  default:
+    // Assume other calling conventions are various compute callable functions
+    return 0;
+  }
+}
+
 SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                  SelectionDAG &DAG) const {
   unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -6096,8 +6118,6 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     unsigned IndexOperand = M->getConstantOperandVal(7);
     unsigned WaveRelease = M->getConstantOperandVal(8);
     unsigned WaveDone = M->getConstantOperandVal(9);
-    unsigned ShaderType;
-    unsigned Instruction;
 
     unsigned OrderedCountIndex = IndexOperand & 0x3f;
     IndexOperand &= ~0x3f;
@@ -6116,36 +6136,11 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     if (IndexOperand)
       report_fatal_error("ds_ordered_count: bad index operand");
 
-    switch (IntrID) {
-    case Intrinsic::amdgcn_ds_ordered_add:
-      Instruction = 0;
-      break;
-    case Intrinsic::amdgcn_ds_ordered_swap:
-      Instruction = 1;
-      break;
-    }
-
     if (WaveDone && !WaveRelease)
       report_fatal_error("ds_ordered_count: wave_done requires wave_release");
 
-    switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
-    case CallingConv::AMDGPU_CS:
-    case CallingConv::AMDGPU_KERNEL:
-      ShaderType = 0;
-      break;
-    case CallingConv::AMDGPU_PS:
-      ShaderType = 1;
-      break;
-    case CallingConv::AMDGPU_VS:
-      ShaderType = 2;
-      break;
-    case CallingConv::AMDGPU_GS:
-      ShaderType = 3;
-      break;
-    default:
-      report_fatal_error("ds_ordered_count unsupported for this calling conv");
-    }
-
+    unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
+    unsigned ShaderType = getDSShaderTypeValue(DAG.getMachineFunction());
     unsigned Offset0 = OrderedCountIndex << 2;
     unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
                        (Instruction << 4);

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
index ad489debc46c..4009c5a63449 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
@@ -57,6 +57,42 @@ define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
   ret float %r
 }
 
+; FUNC-LABEL: {{^}}ds_ordered_add_default_cc:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define float @ds_ordered_add_default_cc() {
+  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_fastcc:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define fastcc float @ds_ordered_add_fastcc() {
+  %val = call i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
+; FUNC-LABEL: {{^}}ds_ordered_add_func:
+; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
+; GCN: s_mov_b32 m0, 0{{$}}
+; VIGFX9-NEXT: s_nop 0
+; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
+; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
+define float @ds_ordered_add_func() {
+  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* null, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
+  %r = bitcast i32 %val to float
+  ret float %r
+}
+
 ; FUNC-LABEL: {{^}}ds_ordered_add_ps:
 ; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
 ; GCN: s_mov_b32 m0, s0