[clang] 23c6d15 - [amdgpu] Add `llvm.amdgcn.endpgm` support.

Thu Nov 5 16:07:00 PST 2020

Author: Michael Liao
Date: 2020-11-05T19:06:50-05:00
New Revision: 23c6d1501d80073784cab367d30d50419ffa5706

URL: https://github.com/llvm/llvm-project/commit/23c6d1501d80073784cab367d30d50419ffa5706
DIFF: https://github.com/llvm/llvm-project/commit/23c6d1501d80073784cab367d30d50419ffa5706.diff

LOG: [amdgpu] Add `llvm.amdgcn.endpgm` support.

- `llvm.amdgcn.endpgm` is added to enable "abort" support.

Differential Revision: https://reviews.llvm.org/D90809

Added: 
    llvm/test/CodeGen/AMDGPU/amd.endpgm.ll

Modified: 
    clang/include/clang/Basic/BuiltinsAMDGPU.def
    clang/test/CodeGenCUDA/builtins-amdgcn.cu
    llvm/include/llvm/IR/IntrinsicsAMDGPU.td
    llvm/lib/Target/AMDGPU/SOPInstructions.td

Removed: 
    


################################################################################
diff  --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index f5901e6f8f3b..123a7ad212da 100644

--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -214,6 +214,8 @@ BUILTIN(__builtin_amdgcn_read_exec, "LUi", "nc")
 BUILTIN(__builtin_amdgcn_read_exec_lo, "Ui", "nc")
 BUILTIN(__builtin_amdgcn_read_exec_hi, "Ui", "nc")
 
+BUILTIN(__builtin_amdgcn_endpgm, "v", "nr")
+
 //===----------------------------------------------------------------------===//
 // R600-NI only builtins.
 //===----------------------------------------------------------------------===//

diff  --git a/clang/test/CodeGenCUDA/builtins-amdgcn.cu b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
index 1c3a79064595..8f0d0d0801bd 100644
--- a/clang/test/CodeGenCUDA/builtins-amdgcn.cu
+++ b/clang/test/CodeGenCUDA/builtins-amdgcn.cu
@@ -16,3 +16,9 @@ void test_ds_fmax(float src) {
   __shared__ float shared;
   volatile float x = __builtin_amdgcn_ds_fmaxf(&shared, src, 0, 0, false);
 }
+
+// CHECK-LABEL: @_Z6endpgmv(
+// CHECK: call void @llvm.amdgcn.endpgm()
+__global__ void endpgm() {
+  __builtin_amdgcn_endpgm();
+}

diff  --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 304377ce28ab..bc04fa40f2a8 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1577,6 +1577,10 @@ def int_amdgcn_wqm_vote : Intrinsic<[llvm_i1_ty],
 // FIXME: Should this be IntrNoMem, IntrHasSideEffects, or IntrWillReturn?
 def int_amdgcn_kill : Intrinsic<[], [llvm_i1_ty], []>;
 
+def int_amdgcn_endpgm : GCCBuiltin<"__builtin_amdgcn_endpgm">,
+  Intrinsic<[], [], [IntrNoReturn, IntrCold, IntrNoMem, IntrHasSideEffects]
+>;
+
 // Copies the active channels of the source value to the destination value,
 // with the guarantee that the source value is computed as if the entire
 // program were executed in Whole Wavefront Mode, i.e. with all channels

diff  --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 08966d7d62eb..00527171ff11 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1118,6 +1118,7 @@ let isTerminator = 1 in {
 def S_ENDPGM : SOPP_Pseudo<"s_endpgm", (ins EndpgmImm:$simm16), "$simm16"> {
   let isBarrier = 1;
   let isReturn = 1;
+  let hasSideEffects = 1;
 }
 
 def S_ENDPGM_SAVED : SOPP_Pseudo<"s_endpgm_saved", (ins)> {
@@ -1328,6 +1329,11 @@ def : GCNPat <
     (S_ENDPGM (i16 0))
 >;
 
+def : GCNPat <
+  (int_amdgcn_endpgm),
+    (S_ENDPGM (i16 0))
+>;
+
 def : GCNPat <
   (i64 (ctpop i64:$src)),
     (i64 (REG_SEQUENCE SReg_64,

diff  --git a/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
new file mode 100644
index 000000000000..ac9cd0699118
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s
+
+define amdgpu_kernel void @test0() {
+; CHECK-LABEL: test0:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_endpgm
+  tail call void @llvm.amdgcn.endpgm()
+  unreachable
+}
+
+define void @test1() {
+; CHECK-LABEL: test1:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    s_endpgm
+  tail call void @llvm.amdgcn.endpgm()
+  unreachable
+}
+
+define amdgpu_kernel void @test2(i32* %p, i32 %x) {
+; CHECK-LABEL: test2:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_cmp_lt_i32 s2, 1
+; CHECK-NEXT:    s_cbranch_scc0 BB2_2
+; CHECK-NEXT:  ; %bb.1: ; %else
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; CHECK-NEXT:    v_mov_b32_e32 v2, s2
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; CHECK-NEXT:    v_mov_b32_e32 v1, s1
+; CHECK-NEXT:    flat_store_dword v[0:1], v2
+; CHECK-NEXT:    s_endpgm
+; CHECK-NEXT:  BB2_2: ; %then
+; CHECK-NEXT:    s_endpgm
+  %cond = icmp sgt i32 %x, 0
+  br i1 %cond, label %then, label %else
+
+then:
+  tail call void @llvm.amdgcn.endpgm()
+  unreachable
+
+else:
+  store i32 %x, i32* %p
+  ret void
+}
+
+declare void @llvm.amdgcn.endpgm()