[llvm-branch-commits] [clang] AMDGPU/clang: Add global_load_lds size check support for gfx950 (PR #117825)

Matt Arsenault via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Nov 26 17:04:15 PST 2024


https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/117825

>From ff1c11253fc5f220ddc92f1733d2151d2e5c54df Mon Sep 17 00:00:00 2001
From: Shilei Tian <shilei.tian at amd.com>
Date: Tue, 18 Jun 2024 23:30:05 -0400
Subject: [PATCH] AMDGPU/clang: Add global_load_lds size check support for
 gfx950

Co-authored-by: Shilei Tian <shilei.tian at amd.com>
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +-
 clang/lib/Sema/SemaAMDGPU.cpp                 | 16 +++++++++-
 .../CodeGenOpenCL/builtins-amdgcn-gfx950.cl   | 30 +++++++++++++++++++
 .../builtins-amdgcn-error-wave32.cl           |  8 ++---
 .../SemaOpenCL/builtins-amdgcn-gfx940-err.cl  | 21 ++++++++-----
 .../builtins-amdgcn-wave32-func-attr.cl       | 18 +++++++++++
 6 files changed, 82 insertions(+), 13 deletions(-)
 create mode 100644 clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 834e588c18e376..3317c4e93199b0 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -12787,5 +12787,5 @@ def err_acc_loop_not_monotonic
 
 // AMDGCN builtins diagnostics
 def err_amdgcn_global_load_lds_size_invalid_value : Error<"invalid size value">;
-def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be 1, 2, or 4">;
+def note_amdgcn_global_load_lds_size_valid_value : Note<"size must be %select{1, 2, or 4|1, 2, 4, 12 or 16}0">;
 } // end of sema component.
diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp
index 02710e51bc0d19..a4d075dfd0768c 100644
--- a/clang/lib/Sema/SemaAMDGPU.cpp
+++ b/clang/lib/Sema/SemaAMDGPU.cpp
@@ -26,6 +26,14 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
                                                 CallExpr *TheCall) {
   // position of memory order and scope arguments in the builtin
   unsigned OrderIndex, ScopeIndex;
+
+  const auto *FD = SemaRef.getCurFunctionDecl();
+  assert(FD && "AMDGPU builtins should not be used outside of a function");
+  llvm::StringMap<bool> CallerFeatureMap;
+  getASTContext().getFunctionFeatureMap(CallerFeatureMap, FD);
+  bool HasGFX950Insts =
+      Builtin::evaluateRequiredTargetFeatures("gfx950-insts", CallerFeatureMap);
+
   switch (BuiltinID) {
   case AMDGPU::BI__builtin_amdgcn_global_load_lds: {
     constexpr const int SizeIdx = 2;
@@ -39,13 +47,19 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID,
     case 2:
     case 4:
       return false;
+    case 12:
+    case 16: {
+      if (HasGFX950Insts)
+        return false;
+      [[fallthrough]];
+    }
     default:
       Diag(ArgExpr->getExprLoc(),
            diag::err_amdgcn_global_load_lds_size_invalid_value)
           << ArgExpr->getSourceRange();
       Diag(ArgExpr->getExprLoc(),
            diag::note_amdgcn_global_load_lds_size_valid_value)
-          << ArgExpr->getSourceRange();
+          << HasGFX950Insts << ArgExpr->getSourceRange();
       return true;
     }
   }
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
index 1eebece1ec22b9..f1259ef678f1e0 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950.cl
@@ -1765,3 +1765,33 @@ void test_cvt_sr_f16_f32(global half2 *out, float src, uint seed)
   *out = __builtin_amdgcn_cvt_sr_f16_f32(*out, src, seed, 0);
   *out = __builtin_amdgcn_cvt_sr_f16_f32(*out, src, seed, 1);
 }
+
+// CHECK-LABEL: @test_global_load_lds_96(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// CHECK-NEXT:    store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT:    store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 12, i32 0, i32 0)
+// CHECK-NEXT:    ret void
+//
+void test_global_load_lds_96(global void* src, local void *dst) {
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0);
+}
+
+// CHECK-LABEL: @test_global_load_lds_128(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[SRC_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
+// CHECK-NEXT:    [[DST_ADDR:%.*]] = alloca ptr addrspace(3), align 4, addrspace(5)
+// CHECK-NEXT:    store ptr addrspace(1) [[SRC:%.*]], ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT:    store ptr addrspace(3) [[DST:%.*]], ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[SRC_ADDR]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr addrspace(3), ptr addrspace(5) [[DST_ADDR]], align 4
+// CHECK-NEXT:    call void @llvm.amdgcn.global.load.lds(ptr addrspace(1) [[TMP0]], ptr addrspace(3) [[TMP1]], i32 16, i32 0, i32 0)
+// CHECK-NEXT:    ret void
+//
+void test_global_load_lds_128(global void* src, local void *dst) {
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0);
+}
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
index e0e3872b566d9e..949ff07f761547 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-error-wave32.cl
@@ -1,7 +1,7 @@
 // RUN: %clang_cc1 -triple amdgcn-- -verify -S -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify -S -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify -S -o - %s
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify=expected,gfx9 -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature +wavefrontsize64 -verify=expected,wavefront64 -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature +wavefrontsize64 -verify=expected,wavefront64 -S -o - %s
 // RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify -S -o - %s
 
 // REQUIRES: amdgpu-registered-target
@@ -14,5 +14,5 @@ void test_ballot_wave32(global uint* out, int a, int b) {
 
 __attribute__((target("wavefrontsize32"))) // gfx9-error@*:* {{option 'wavefrontsize32' cannot be specified on this target}}
 void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
-  *out = __builtin_amdgcn_ballot_w32(a == b);
+  *out = __builtin_amdgcn_ballot_w32(a == b); // wavefront64-error@*:* {{invalid feature combination: 'wavefrontsize32' and 'wavefrontsize64' are mutually exclusive}}
 }
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
index 2a1ba4300864cb..7cf80f7c92677d 100644
--- a/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-gfx940-err.cl
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify -o - %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -S -verify=gfx940,expected -o - %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx950 -S -verify=gfx950,expected -o - %s
 // REQUIRES: amdgpu-registered-target
 
 typedef unsigned int u32;
@@ -7,10 +8,16 @@ void test_global_load_lds_unsupported_size(global u32* src, local u32 *dst, u32
   __builtin_amdgcn_global_load_lds(src, dst, size, /*offset=*/0, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
   __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, offset, /*aux=*/0); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
   __builtin_amdgcn_global_load_lds(src, dst, /*size=*/4, /*offset=*/0, aux); // expected-error{{argument to '__builtin_amdgcn_global_load_lds' must be a constant integer}}
-  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
-  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
-  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
-  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
-  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
-  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} expected-note {{size must be 1, 2, or 4}}
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/5, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/0, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/3, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}}
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0); // gfx940-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}}
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/-1, /*offset=*/0, /*aux=*/0); // expected-error{{invalid size value}} gfx940-note {{size must be 1, 2, or 4}} gfx950-note {{size must be 1, 2, 4, 12 or 16}}
+}
+
+__attribute__((target("gfx950-insts")))
+void test_global_load_lds_via_target_feature(global u32* src, local u32 *dst, u32 size, u32 offset, u32 aux) {
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/12, /*offset=*/0, /*aux=*/0);
+  __builtin_amdgcn_global_load_lds(src, dst, /*size=*/16, /*offset=*/0, /*aux=*/0);
 }
diff --git a/clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl b/clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl
new file mode 100644
index 00000000000000..55c890368525ac
--- /dev/null
+++ b/clang/test/SemaOpenCL/builtins-amdgcn-wave32-func-attr.cl
@@ -0,0 +1,18 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify=default -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -verify=gfx9 -S -o - %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -verify=gfx10 -S -o - %s
+// RUN: not %clang_cc1 -triple amdgcn-- -target-cpu gfx900 -target-feature -wavefrontsize32 -S -o - %s 2>&1 | FileCheck --check-prefix=GFX9 %s
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1010 -target-feature -wavefrontsize32 -verify=gfx10 -S -o - %s
+
+// REQUIRES: amdgpu-registered-target
+
+// default-no-diagnostics
+// gfx10-no-diagnostics
+
+typedef unsigned int uint;
+
+// GFX9: error: option 'wavefrontsize32' cannot be specified on this target
+__attribute__((target("wavefrontsize32"))) // gfx9-error@*:* {{option 'wavefrontsize32' cannot be specified on this target}}
+void test_ballot_wave32_target_attr(global uint* out, int a, int b) {
+  *out = __builtin_amdgcn_ballot_w32(a == b);
+}



More information about the llvm-branch-commits mailing list