[llvm] [AMDGPU] Add llvm.amdgcn.set.inactive.chain.arg intrinsic (PR #71530)

via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 7 04:26:14 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Diana (rovka)

<details>
<summary>Changes</summary>

Add a new intrinsic, similar to llvm.amdgcn.set.inactive, but used only in functions with the `amdgpu_cs_chain` or `amdgpu_cs_chain_preserve` calling conventions. It allows setting the inactive lanes to those of a value received as a VGPR argument (whereas llvm.amdgcn.set.inactive usually takes a constant as the value of the inactive lanes).

Differential Revision: https://reviews.llvm.org/D158604

---
Full diff: https://github.com/llvm/llvm-project/pull/71530.diff


3 Files Affected:

- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+11) 
- (modified) llvm/lib/IR/Verifier.cpp (+24) 
- (modified) llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll (+60) 


``````````diff
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 1254499648fefff..f10bc7c75eb199b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2135,6 +2135,17 @@ def int_amdgcn_set_inactive :
              LLVMMatchType<0>], // value for the inactive lanes to take
             [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
+// Similar to int_amdgcn_set_inactive, but the value for the inactive lanes must
+// be a VGPR function argument.
+// Can only be used in functions with the `amdgpu_cs_chain` or
+// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control
+// flow.
+def int_amdgcn_set_inactive_chain_arg :
+  Intrinsic<[llvm_anyint_ty],
+            [LLVMMatchType<0>, // value to be copied
+             LLVMMatchType<0>], // value for the inactive lanes to take
+            [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
 // Return if the given flat pointer points to a local memory address.
 def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">,
   DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 24dbd65d9abca23..9ef3a0d49b06b02 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5987,6 +5987,30 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
           "VGPR arguments must not have the `inreg` attribute", &Call);
     break;
   }
+  case Intrinsic::amdgcn_set_inactive_chain_arg: {
+    auto CallerCC = Call.getCaller()->getCallingConv();
+    switch (CallerCC) {
+    case CallingConv::AMDGPU_CS_Chain:
+    case CallingConv::AMDGPU_CS_ChainPreserve:
+      break;
+    default:
+      CheckFailed("Intrinsic can only be used from functions with the "
+                  "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
+                  "calling conventions",
+                  &Call);
+      break;
+    }
+
+    unsigned InactiveIdx = 1;
+    Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
+          "Value for inactive lanes must not have the `inreg` attribute",
+          &Call);
+    Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
+          "Value for inactive lanes must be a function argument", &Call);
+    Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
+          "Value for inactive lanes must be a VGPR function argument", &Call);
+    break;
+  }
   case Intrinsic::experimental_convergence_entry:
     LLVM_FALLTHROUGH;
   case Intrinsic::experimental_convergence_anchor:
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll
index 4a284ecc8238349..b9e6e1eb4590525 100644
--- a/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll
@@ -1,6 +1,7 @@
 ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
 
 declare void @llvm.amdgcn.cs.chain(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) noreturn
+declare i32 @llvm.amdgcn.set.inactive.chain.arg(i32, i32) convergent willreturn nofree nocallback readnone
 
 define amdgpu_cs_chain void @bad_flags(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) {
   ; CHECK: immarg operand has non-immediate parameter
@@ -32,6 +33,10 @@ define amdgpu_cs_chain void @bad_exec(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr,
 }
 
 define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+  ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
   ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
   ; CHECK-NEXT: @llvm.amdgcn.cs.chain
   call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
@@ -39,6 +44,10 @@ define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, {
 }
 
 define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+  ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
   ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
   ; CHECK-NEXT: @llvm.amdgcn.cs.chain
   call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
@@ -46,6 +55,10 @@ define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32
 }
 
 define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+  ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
   ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
   ; CHECK-NEXT: @llvm.amdgcn.cs.chain
   call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
@@ -53,8 +66,55 @@ define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inre
 }
 
 define amdgpu_vs void @bad_caller_amdgpu_vs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+  ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
   ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
   ; CHECK-NEXT: @llvm.amdgcn.cs.chain
   call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
   unreachable
 }
+
+define amdgpu_cs void @bad_caller_amdgpu_cs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+  ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
+  ; Unlike llvm.amdgcn.set.inactive.chain.arg, llvm.amdgcn.cs.chain may be called from amdgpu_cs functions.
+
+  ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_sgpr(ptr addrspace(1) %out, i32 %active, i32 inreg %inactive) {
+  ; CHECK: Value for inactive lanes must be a VGPR function argument
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0
+  store i32 %tmp, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_const(ptr addrspace(1) %out, i32 %active) {
+  ; CHECK: Value for inactive lanes must be a function argument
+  ; CHECK-NEXT: llvm.amdgcn.set.inactive.chain.arg
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 29) #0
+  store i32 %tmp, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_computed(ptr addrspace(1) %out, i32 %active) {
+  ; CHECK: Value for inactive lanes must be a function argument
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %inactive = add i32 %active, 127
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0
+  store i32 %tmp, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_inreg(ptr addrspace(1) %out, i32 %active, i32 %inactive) {
+  ; CHECK: Value for inactive lanes must not have the `inreg` attribute
+  ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+  %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 inreg %inactive) #0
+  store i32 %tmp, ptr addrspace(1) %out
+  ret void
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/71530


More information about the llvm-commits mailing list