[clang] [Clang][AMDGPU] Add builtins for instrinsic `llvm.amdgcn.raw.ptr.buffer.load` (PR #99258)
Shilei Tian via cfe-commits
cfe-commits at lists.llvm.org
Thu Jul 18 11:50:45 PDT 2024
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/99258
>From 578d497caf7b0cc8d10779055dd4f220323ac9a1 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Tue, 16 Jul 2024 18:34:35 -0400
Subject: [PATCH] [Clang][AMDGPU] Add builtins for instrinsic
`llvm.amdgcn.raw.ptr.buffer.load`
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 6 +
clang/lib/CodeGen/CGBuiltin.cpp | 33 ++++
.../builtins-amdgcn-raw-buffer-load.cl | 172 ++++++++++++++++++
.../builtins-amdgcn-raw-buffer-load-error.cl | 33 ++++
4 files changed, 244 insertions(+)
create mode 100644 clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e62315eea277a..774cbaa74f8af 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -155,6 +155,12 @@ BUILTIN(__builtin_amdgcn_raw_buffer_store_b32, "viQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b64, "vV2iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b96, "vV3iQbiiIi", "n")
BUILTIN(__builtin_amdgcn_raw_buffer_store_b128, "vV4iQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b8, "UcQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b16, "UsQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b32, "UiQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b64, "V2UiQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b96, "V3UiQbiiIi", "n")
+BUILTIN(__builtin_amdgcn_raw_buffer_load_b128, "V4UiQbiiIi", "n")
//===----------------------------------------------------------------------===//
// Ballot builtins.
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2ad62d6ee0bb2..f426570b17e15 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -19185,6 +19185,39 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_raw_buffer_store_b128:
return emitBuiltinWithOneOverloadedType<5>(
*this, E, Intrinsic::amdgcn_raw_ptr_buffer_store);
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128: {
+ llvm::Type *RetTy = nullptr;
+ switch (BuiltinID) {
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b8:
+ RetTy = Int8Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b16:
+ RetTy = Int16Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b32:
+ RetTy = Int32Ty;
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b64:
+ RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/2);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b96:
+ RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/3);
+ break;
+ case AMDGPU::BI__builtin_amdgcn_raw_buffer_load_b128:
+ RetTy = llvm::FixedVectorType::get(Int32Ty, /*NumElements=*/4);
+ break;
+ }
+ Function *F =
+ CGM.getIntrinsic(Intrinsic::amdgcn_raw_ptr_buffer_load, RetTy);
+ return Builder.CreateCall(
+ F, {EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)),
+ EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3))});
+ }
default:
return nullptr;
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
new file mode 100644
index 0000000000000..482e05dd838f6
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-raw-buffer-load.cl
@@ -0,0 +1,172 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -emit-llvm -o - %s | FileCheck %s
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned int u64 __attribute__((ext_vector_type(2)));
+typedef unsigned int u96 __attribute__((ext_vector_type(3)));
+typedef unsigned int u128 __attribute__((ext_vector_type(4)));
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+u8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+u16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+u32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+u64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+u96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+u128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+u64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+u96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 [[OFFSET:%.*]], i32 0, i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+u128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_offset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, offset, /*soffset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i8 [[TMP0]]
+//
+u8 test_amdgcn_raw_ptr_buffer_load_b8_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i16 [[TMP0]]
+//
+u16 test_amdgcn_raw_ptr_buffer_load_b16_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret i32 [[TMP0]]
+//
+u32 test_amdgcn_raw_ptr_buffer_load_b32_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <2 x i32> [[TMP0]]
+//
+u64 test_amdgcn_raw_ptr_buffer_load_b64_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <3 x i32> [[TMP0]]
+//
+u96 test_amdgcn_raw_ptr_buffer_load_b96_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) [[RSRC:%.*]], i32 0, i32 [[SOFFSET:%.*]], i32 0)
+// CHECK-NEXT: ret <4 x i32> [[TMP0]]
+//
+u128 test_amdgcn_raw_ptr_buffer_load_b128_non_const_soffset(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, soffset, /*aux=*/0);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
new file mode 100644
index 0000000000000..1929da8c565d3
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-raw-buffer-load-error.cl
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu verde -S -verify -o - %s
+// REQUIRES: amdgpu-registered-target
+
+typedef unsigned char u8;
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned int u64 __attribute__((ext_vector_type(2)));
+typedef unsigned int u96 __attribute__((ext_vector_type(3)));
+typedef unsigned int u128 __attribute__((ext_vector_type(4)));
+
+u8 test_amdgcn_raw_ptr_buffer_load_b8(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b8(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b8' must be a constant integer}}
+}
+
+u16 test_amdgcn_raw_ptr_buffer_load_b16(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b16(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b16' must be a constant integer}}
+}
+
+u32 test_amdgcn_raw_ptr_buffer_load_b32(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b32(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b32' must be a constant integer}}
+}
+
+u64 test_amdgcn_raw_ptr_buffer_load_b64(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b64(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b64' must be a constant integer}}
+}
+
+u96 test_amdgcn_raw_ptr_buffer_load_b96(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b96(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b96' must be a constant integer}}
+}
+
+u128 test_amdgcn_raw_ptr_buffer_load_b128(__amdgpu_buffer_rsrc_t rsrc, int offset, int soffset, int aux) {
+ return __builtin_amdgcn_raw_buffer_load_b128(rsrc, /*offset=*/0, /*soffset=*/0, aux); //expected-error{{argument to '__builtin_amdgcn_raw_buffer_load_b128' must be a constant integer}}
+}
More information about the cfe-commits
mailing list