[llvm-branch-commits] [clang] AMDGPU/clang: Add global_load_lds size check support for gfx950 (PR #117825)
Matt Arsenault via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Nov 26 16:53:22 PST 2024
https://github.com/arsenm created https://github.com/llvm/llvm-project/pull/117825
Co-authored-by: Shilei Tian <shilei.tian at amd.com>
>From bebc41c78dbf24e10bcd053c555035397f609efb Mon Sep 17 00:00:00 2001
From: Shilei Tian <shilei.tian at amd.com>
Date: Tue, 18 Jun 2024 23:30:05 -0400
Subject: [PATCH] AMDGPU/clang: Add global_load_lds size check support for
gfx950
Co-authored-by: Shilei Tian <shilei.tian at amd.com>
---
.../clang/Basic/DiagnosticSemaKinds.td | 2 +-
clang/lib/Sema/SemaAMDGPU.cpp | 16 +++++++++-
.../CodeGenOpenCL/builtins-amdgcn-gfx950.cl | 30 +++++++++++++++++++
.../builtins-amdgcn-error-wave32.cl | 8 ++---
.../SemaOpenCL/builtins-amdgcn-gfx940-err.cl | 21 ++++++++-----
.../builtins-amdgcn-wave32-func-attr.cl | 18 +++++++++++
6 files changed, 82 insertions(+), 13 deletions(-)
create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 834e588c18e376..3317c4e93199b0 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12787,5 +12787,5 @@ def err_acc_loop_not_monotonic
// AMDGCN builtins diagnostics
def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">;
-def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be 1, 2, or 4">;
+def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
} // end of sema component.
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 02710e51bc0d19..a4d075dfd0768c 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -26,6 +26,14 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
CallExpr *TheCall) {
// position of memory order and scope arguments in the builtin
unsigned OrderIndex, ScopeIndex;
+
+ const auto *FD = SemaRef.getCurFunctionDecl();
+ assert(FD && "AMDGPU builtins should not be used outside of a function");
+ llvm::StringMap<bool> CallerFeatureMap;
+ getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
+ bool HasGFX950Insts =
+ Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
+
switch (BuiltinID) {
case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
constexpr const int SizeIdx = 2;
@@ -39,13 +47,19 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
case 2:
case 4:
return false;
+ case 12:
+ case 16: {
+ if (HasGFX950Insts)
+ return false;
+ [[fallthrough]];
+ }
default:
Diag(ArgExpr->getExprLoc(),
diag::err_amdgcn_global_load_lds_size_invalid_value)
<< ArgExpr->getSourceRange();
Diag(ArgExpr->getExprLoc(),
diag::note_amdgcn_global_load_lds_size_valid_value)
- << ArgExpr->getSourceRange();
+ << HasGFX950Insts << ArgExpr->getSourceRange();
return true;
}
}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
index 1eebece1ec22b9..f1259ef678f1e0 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
@@ -1765,3 +1765,33 @@ void test_cvt_sr_f16_f32(global half2 *out, float src, uint seed)
*out = __builtin_amdgcn_cvt_sr_f16_f32(*out, src, seed, 0);
*out = __builtin_amdgcn_cvt_sr_f16_f32(*out, src, seed, 1);
}
+
+// CHECK-LABEL: @test_global_load_lds_96(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 12, i32 0, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_global_load_lds_96(global void* src, local void *dst) {
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_global_load_lds_128(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT: [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// CHECK-NEXT: store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT: store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT: call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 16, i32 0, i32 0)
+// CHECK-NEXT: ret void
+//
+void test_global_load_lds_128(global void* src, local void *dst) {
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
index e0e3872b566d9e..949ff07f761547 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
@@ -1,7 +1,7 @@
// RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify -S -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify=expected,gfx9 -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify=expected,wavefront64 -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify=expected,wavefront64 -S -o - %s
// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify -S -o - %s
// REQUIRES: amdgpu-registered-target
@@ -14,5 +14,5 @@ void test_ballot_wave32(global uint* out, int a, int b) {
__attribute__((target("wavefrontsize32"))) // gfx9-error@*:* {{option 'wavefrontsize32' cannot be specified on this target}}
void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
- *out = __builtin_amdgcn_ballot_w32(a == b);
+ *out = __builtin_amdgcn_ballot_w32(a == b); // wavefront64-error@*:* {{invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive}}
}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
index 2a1ba4300864cb..7cf80f7c92677d 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify -o - %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify=gfx940,expected -o - %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s
// REQUIRES: amdgpu-registered-target
typedef unsigned int u32;
@@ -7,10 +8,16 @@ void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32
__builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
__builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
- __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
- __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
- __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
- __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
- __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
- __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}}
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+}
+
+__attribute__((target("gfx950-insts")))
+void test_global_load_lds_via_target_feature(global u32* src, local u32 *dst, u32 size, u32 offset, u32 aux) {
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0);
+ __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0);
}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl b/clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl
new file mode 100644
index 00000000000000..55c890368525ac
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify=default -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify=gfx9 -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify=gfx10 -S -o - %s
+// RUN: not %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature -wavefrontsize32 -S -o - %s 2>&1 | FileCheck --check-prefix=GFX9 %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify=gfx10 -S -o - %s
+
+// REQUIRES: amdgpu-registered-target
+
+// default-no-diagnostics
+// gfx10-no-diagnostics
+
+typedef unsigned int uint;
+
+// GFX9: error: option 'wavefrontsize32' cannot be specified on this target
+__attribute__((target("wavefrontsize32"))) // gfx9-error@*:* {{option 'wavefrontsize32' cannot be specified on this target}}
+void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
+ *out = __builtin_amdgcn_ballot_w32(a == b);
+}
More information about the llvm-branch-commits
mailing list