[llvm] f59f116 - AMDGPU: Add __builtin_amdgcn_permlane64
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 13 21:12:24 PDT 2022
Author: Matt Arsenault
Date: 2022-10-13T21:12:11-07:00
New Revision: f59f116bd5c357b1cb4b04693c88d41484e168d5
URL: https://github.com/llvm/llvm-project/commit/f59f116bd5c357b1cb4b04693c88d41484e168d5
DIFF: https://github.com/llvm/llvm-project/commit/f59f116bd5c357b1cb4b04693c88d41484e168d5.diff
LOG: AMDGPU: Add __builtin_amdgcn_permlane64
Added:
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
clang/test/SemaOpenCL/builtins-amdgcn-gfx11.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index d8f36f5c36021..d4d16d5a9563d 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -259,6 +259,9 @@ TARGET_BUILTIN(__builtin_amdgcn_image_bvh_intersect_ray_lh, "V4UiWUifV4fV4hV4hV4
// GFX11+ only builtins.
//===----------------------------------------------------------------------===//
+// TODO: This is a no-op in wave32. Should the builtin require wavefrontsize64?
+TARGET_BUILTIN(__builtin_amdgcn_permlane64, "UiUi", "nc", "gfx11-insts")
+
//===----------------------------------------------------------------------===//
// WMMA builtins.
// Postfix w32 indicates the builtin requires wavefront size of 32.
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
index 4d64d7c9b0fb4..a4f2d610afa83 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx11.cl
@@ -31,3 +31,9 @@ void test_ds_bvh_stack_rtn(global uint2* out, uint addr, uint data, uint4 data1)
{
*out = __builtin_amdgcn_ds_bvh_stack_rtn(addr, data, data1, 128);
}
+
+// CHECK-LABEL: @test_permlane64(
+// CHECK: call i32 @llvm.amdgcn.permlane64(i32 %a)
+void test_permlane64(global uint* out, uint a) {
+ *out = __builtin_amdgcn_permlane64(a);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx11.cl b/clang/test/SemaOpenCL/builtins-amdgcn-gfx11.cl
index adfd2369cc121..823d30262c648 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-gfx11.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-gfx11.cl
@@ -13,4 +13,6 @@ void test(global uint* out1, global ulong* out2, int x) {
#if __has_builtin(__builtin_amdgcn_s_sendmsg_rtnl)
*out2 = __builtin_amdgcn_s_sendmsg_rtnl(x); // GFX11-error {{argument to '__builtin_amdgcn_s_sendmsg_rtnl' must be a constant integer}}
#endif
+
+ *out1 = __builtin_amdgcn_permlane64(x); // GFX10-error {{'__builtin_amdgcn_permlane64' needs target feature gfx11-insts}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index a90c44ba19957..2c63d2e77e5e5 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1991,6 +1991,7 @@ def int_amdgcn_image_bvh_intersect_ray :
// llvm.amdgcn.permlane64 <src0>
def int_amdgcn_permlane64 :
+ ClangBuiltin<"__builtin_amdgcn_permlane64">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty],
[IntrNoMem, IntrConvergent, IntrWillReturn]>;
More information about the llvm-commits
mailing list