[llvm] [AMDGPU] Add llvm.amdgcn.set.inactive.chain.arg intrinsic (PR #71530)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 7 04:25:43 PST 2023
https://github.com/rovka created https://github.com/llvm/llvm-project/pull/71530
Add a new intrinsic, similar to llvm.amdgcn.set.inactive, but used only in functions with the `amdgpu_cs_chain` or `amdgpu_cs_chain_preserve` calling conventions. It allows setting the inactive lanes to those of a value received as a VGPR argument (whereas llvm.amdgcn.set.inactive usually takes a constant as the value of the inactive lanes).
Differential Revision: https://reviews.llvm.org/D158604
>From 040a6488f0b313a755ba41310b0fe6daa01da692 Mon Sep 17 00:00:00 2001
From: Diana Picus <Diana-Magda.Picus at amd.com>
Date: Tue, 7 Nov 2023 13:11:19 +0100
Subject: [PATCH] [AMDGPU] Add llvm.amdgcn.set.inactive.chain.arg intrinsic
Add a new intrinsic, similar to llvm.amdgcn.set.inactive, but used only
in functions with the `amdgpu_cs_chain` or `amdgpu_cs_chain_preserve`
calling conventions. It allows setting the inactive lanes to those of
a value received as a VGPR argument (whereas llvm.amdgcn.set.inactive
usually takes a constant as the value of the inactive lanes).
Differential Revision: https://reviews.llvm.org/D158604
---
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 11 ++++
llvm/lib/IR/Verifier.cpp | 24 ++++++++
.../AMDGPU/intrinsic-amdgpu-cs-chain.ll | 60 +++++++++++++++++++
3 files changed, 95 insertions(+)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 1254499648fefff..f10bc7c75eb199b 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -2135,6 +2135,17 @@ def int_amdgcn_set_inactive :
LLVMMatchType<0>], // value for the inactive lanes to take
[IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+// Similar to int_amdgcn_set_inactive, but the value for the inactive lanes must
+// be a VGPR function argument.
+// Can only be used in functions with the `amdgpu_cs_chain` or
+// `amdgpu_cs_chain_preserve` calling conventions, and only in uniform control
+// flow.
+def int_amdgcn_set_inactive_chain_arg :
+ Intrinsic<[llvm_anyint_ty],
+ [LLVMMatchType<0>, // value to be copied
+ LLVMMatchType<0>], // value for the inactive lanes to take
+ [IntrNoMem, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+
// Return if the given flat pointer points to a local memory address.
def int_amdgcn_is_shared : ClangBuiltin<"__builtin_amdgcn_is_shared">,
DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_ptr_ty],
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 24dbd65d9abca23..9ef3a0d49b06b02 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5987,6 +5987,30 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
"VGPR arguments must not have the `inreg` attribute", &Call);
break;
}
+ case Intrinsic::amdgcn_set_inactive_chain_arg: {
+ auto CallerCC = Call.getCaller()->getCallingConv();
+ switch (CallerCC) {
+ case CallingConv::AMDGPU_CS_Chain:
+ case CallingConv::AMDGPU_CS_ChainPreserve:
+ break;
+ default:
+ CheckFailed("Intrinsic can only be used from functions with the "
+ "amdgpu_cs_chain or amdgpu_cs_chain_preserve "
+ "calling conventions",
+ &Call);
+ break;
+ }
+
+ unsigned InactiveIdx = 1;
+ Check(!Call.paramHasAttr(InactiveIdx, Attribute::InReg),
+ "Value for inactive lanes must not have the `inreg` attribute",
+ &Call);
+ Check(isa<Argument>(Call.getArgOperand(InactiveIdx)),
+ "Value for inactive lanes must be a function argument", &Call);
+ Check(!cast<Argument>(Call.getArgOperand(InactiveIdx))->hasInRegAttr(),
+ "Value for inactive lanes must be a VGPR function argument", &Call);
+ break;
+ }
case Intrinsic::experimental_convergence_entry:
LLVM_FALLTHROUGH;
case Intrinsic::experimental_convergence_anchor:
diff --git a/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll
index 4a284ecc8238349..b9e6e1eb4590525 100644
--- a/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll
+++ b/llvm/test/Verifier/AMDGPU/intrinsic-amdgpu-cs-chain.ll
@@ -1,6 +1,7 @@
; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s
declare void @llvm.amdgcn.cs.chain(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) noreturn
+declare i32 @llvm.amdgcn.set.inactive.chain.arg(i32, i32) convergent willreturn nofree nocallback readnone
define amdgpu_cs_chain void @bad_flags(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr, i32 %flags) {
; CHECK: immarg operand has non-immediate parameter
@@ -32,6 +33,10 @@ define amdgpu_cs_chain void @bad_exec(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr,
}
define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+ ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
@@ -39,6 +44,10 @@ define void @bad_caller_default_cc(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, {
}
define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+ ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
@@ -46,6 +55,10 @@ define amdgpu_kernel void @bad_caller_amdgpu_kernel(ptr %fn, i32 %exec, <4 x i32
}
define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+ ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
@@ -53,8 +66,55 @@ define amdgpu_gfx void @bad_caller_amdgpu_gfx(ptr %fn, i32 %exec, <4 x i32> inre
}
define amdgpu_vs void @bad_caller_amdgpu_vs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+ ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
; CHECK: Intrinsic can only be used from functions with the amdgpu_cs, amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
; CHECK-NEXT: @llvm.amdgcn.cs.chain
call void(ptr, i32, <4 x i32>, { ptr, <3 x i32> }, i32, ...) @llvm.amdgcn.cs.chain(ptr %fn, i32 %exec, <4 x i32> %sgpr, { ptr, <3 x i32> } %vgpr, i32 0)
unreachable
}
+
+define amdgpu_cs void @bad_caller_amdgpu_cs(ptr %fn, i32 %exec, <4 x i32> inreg %sgpr, { ptr, <3 x i32> } %vgpr) {
+ ; CHECK: Intrinsic can only be used from functions with the amdgpu_cs_chain or amdgpu_cs_chain_preserve calling conventions
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %unused = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 0, i32 1)
+
+ ; Unlike llvm.amdgcn.set.inactive.chain.arg, llvm.amdgcn.cs.chain may be called from amdgpu_cs functions.
+
+ ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_sgpr(ptr addrspace(1) %out, i32 %active, i32 inreg %inactive) {
+ ; CHECK: Value for inactive lanes must be a VGPR function argument
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0
+ store i32 %tmp, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_const(ptr addrspace(1) %out, i32 %active) {
+ ; CHECK: Value for inactive lanes must be a function argument
+ ; CHECK-NEXT: llvm.amdgcn.set.inactive.chain.arg
+ %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 29) #0
+ store i32 %tmp, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_computed(ptr addrspace(1) %out, i32 %active) {
+ ; CHECK: Value for inactive lanes must be a function argument
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %inactive = add i32 %active, 127
+ %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 %inactive) #0
+ store i32 %tmp, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_cs_chain void @set_inactive_chain_arg_inreg(ptr addrspace(1) %out, i32 %active, i32 %inactive) {
+ ; CHECK: Value for inactive lanes must not have the `inreg` attribute
+ ; CHECK-NEXT: @llvm.amdgcn.set.inactive.chain.arg
+ %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg(i32 %active, i32 inreg %inactive) #0
+ store i32 %tmp, ptr addrspace(1) %out
+ ret void
+}
More information about the llvm-commits
mailing list