[clang] [llvm] [AMDGPU] modify named barrier builtins and intrinsics (PR #114550)

via cfe-commits cfe-commits at lists.llvm.org
Fri Nov 1 08:34:59 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Gang Chen (cmc-rep)

<details>
<summary>Changes</summary>



---

Patch is 132.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114550.diff


21 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+6-6) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl (+7) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+59-79) 
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+32-9) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+3-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+122-73) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (+2) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+3-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp (+124) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (+7) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp (+22-1) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.h (+5) 
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+7-16) 
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+6) 
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+65-42) 
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+2-2) 
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+6-4) 
- (modified) llvm/test/Assembler/target-type-param-errors.ll (+5) 
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll (-1373) 
- (added) llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll (+66) 
- (added) llvm/test/CodeGen/AMDGPU/s-barrier.ll (+299) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 29001e32085151..8f44afa4059386 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -439,15 +439,15 @@ TARGET_BUILTIN(__builtin_amdgcn_s_sleep_var, "vUi", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_permlane16_var,  "UiUiUiUiIbIb", "nc", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_permlanex16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vi", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vv*i", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst_var, "bi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vii", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "b", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts")
 TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts")
 
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
index 5d86a9b369429f..1a5043328895ac 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
@@ -23,6 +23,13 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global
   *out = *in;
 }
 
+kernel void builtins_amdgcn_s_barrier_leave_err(global int* in, global int* out, int barrier) {
+
+  __builtin_amdgcn_s_barrier_signal(-1);
+  __builtin_amdgcn_s_barrier_leave(barrier); // expected-error {{'__builtin_amdgcn_s_barrier_leave' must be a constant integer}}
+  *out = *in;
+}
+
 void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off)
 {
   __builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index 9bfedac0032965..b1866a8e492c84 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -87,16 +87,21 @@ void test_s_barrier_signal()
 
 // CHECK-LABEL: @test_s_barrier_signal_var(
 // CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
 // CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.signal.var(i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
 // CHECK-NEXT:    ret void
 //
-void test_s_barrier_signal_var(int a)
+void test_s_barrier_signal_var(void *bar, int a)
 {
-  __builtin_amdgcn_s_barrier_signal_var(a);
+  __builtin_amdgcn_s_barrier_signal_var(bar, a);
 }
 
 // CHECK-LABEL: @test_s_barrier_signal_isfirst(
@@ -134,110 +139,63 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
   __builtin_amdgcn_s_barrier_wait(1);
 }
 
-// CHECK-LABEL: @test_s_barrier_isfirst_var(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
-// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
-// CHECK-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
-// CHECK-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
-// CHECK-NEXT:    store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32 [[TMP0]])
-// CHECK-NEXT:    br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    br label [[IF_END:%.*]]
-// CHECK:       if.else:
-// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[TMP3]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
-// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.wait(i16 1)
-// CHECK-NEXT:    ret void
-//
-void test_s_barrier_isfirst_var(int* a, int* b, int *c, int d)
-{
- if ( __builtin_amdgcn_s_barrier_signal_isfirst_var(d))
-    a = b;
-  else
-    a = c;
-
-  __builtin_amdgcn_s_barrier_wait(1);
-
-}
-
 // CHECK-LABEL: @test_s_barrier_init(
 // CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
 // CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.init(i32 1, i32 [[TMP0]])
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
 // CHECK-NEXT:    ret void
 //
-void test_s_barrier_init(int a)
+void test_s_barrier_init(void *bar, int a)
 {
-  __builtin_amdgcn_s_barrier_init(1, a);
+  __builtin_amdgcn_s_barrier_init(bar, a);
 }
 
 // CHECK-LABEL: @test_s_barrier_join(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.join(i32 1)
+// CHECK-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-void test_s_barrier_join()
+void test_s_barrier_join(void *bar)
 {
-  __builtin_amdgcn_s_barrier_join(1);
+  __builtin_amdgcn_s_barrier_join(bar);
 }
 
 // CHECK-LABEL: @test_s_wakeup_barrier(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.join(i32 1)
+// CHECK-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
 // CHECK-NEXT:    ret void
 //
-void test_s_wakeup_barrier()
+void test_s_wakeup_barrier(void *bar)
 {
-  __builtin_amdgcn_s_barrier_join(1);
+  __builtin_amdgcn_s_wakeup_barrier(bar);
 }
 
 // CHECK-LABEL: @test_s_barrier_leave(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
-// CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
-// CHECK-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
-// CHECK-NEXT:    store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.leave()
-// CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK:       if.then:
-// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    br label [[IF_END:%.*]]
-// CHECK:       if.else:
-// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT:    br label [[IF_END]]
-// CHECK:       if.end:
+// CHECK-NEXT:    call void @llvm.amdgcn.s.barrier.leave(i16 1)
 // CHECK-NEXT:    ret void
 //
-void test_s_barrier_leave(int* a, int* b, int *c)
+void test_s_barrier_leave()
 {
-  if (__builtin_amdgcn_s_barrier_leave())
-    a = b;
-  else
-    a = c;
+  __builtin_amdgcn_s_barrier_leave(1);
 }
 
 // CHECK-LABEL: @test_s_get_barrier_state(
@@ -261,6 +219,28 @@ unsigned test_s_get_barrier_state(int a)
   return State;
 }
 
+// CHECK-LABEL: @test_s_get_named_barrier_state(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT:    [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT:    [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT:    [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr
+// CHECK-NEXT:    store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT:    [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]])
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+unsigned test_s_get_named_barrier_state(void *bar)
+{
+  unsigned State = __builtin_amdgcn_s_get_named_barrier_state(bar);
+  return State;
+}
+
 // CHECK-LABEL: @test_s_ttracedata(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    call void @llvm.amdgcn.s.ttracedata(i32 1)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 143b538b361c9c..d6375ab77cfb32 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 def global_ptr_ty : LLVMQualPointerType<1>;
+def local_ptr_ty : LLVMQualPointerType<3>;
 
 // The amdgpu-no-* attributes (ex amdgpu-no-workitem-id-z) typically inferred
 // by the backend cause whole-program undefined behavior when violated, such as
@@ -247,48 +248,70 @@ def int_amdgcn_s_sendmsghalt : ClangBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
 def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty],
   [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
 
+// Vanilla workgroup sync-barrier
 def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">,
   Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
+// Lower-level split-barrier intrinsics
+
+// void @llvm.amdgcn.s.barrier.signal(i32 %barrierType)
+// only for non-named barrier
 def int_amdgcn_s_barrier_signal : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal">,
   Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
                                 IntrNoCallback, IntrNoFree]>;
 
+// void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %barrier, i32 %memberCnt)
+// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined.
 def int_amdgcn_s_barrier_signal_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_var">,
-  Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+  Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
                                 IntrNoCallback, IntrNoFree]>;
 
+// bool @llvm.amdgcn.s.barrier.signal.isfirst(i32 %barrierType)
+// only for non-named barrier
 def int_amdgcn_s_barrier_signal_isfirst : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst">,
   Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
                                 IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
-def int_amdgcn_s_barrier_signal_isfirst_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst_var">,
-  Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
-                                IntrNoCallback, IntrNoFree]>;
-
+// void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %barrier, i32 %memberCnt)
+// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined.
 def int_amdgcn_s_barrier_init : ClangBuiltin<"__builtin_amdgcn_s_barrier_init">,
-  Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent,
+  Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent,
                                 IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
+// void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
 def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
-  Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+  Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
                                 IntrNoCallback, IntrNoFree]>;
 
+// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
 def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
-  Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+  Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
                                 IntrNoCallback, IntrNoFree]>;
 
+// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
 def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
   Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
                                 IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
+// void @llvm.amdgcn.s.barrier.leave(i16 %barrierType)
 def int_amdgcn_s_barrier_leave : ClangBuiltin<"__builtin_amdgcn_s_barrier_leave">,
-  Intrinsic<[llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+  Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
+                                IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
+// uint32_t @llvm.amdgcn.s.get.barrier.state(i32 %barrierId)
+// The %barrierType argument must be uniform, otherwise behavior is undefined.
 def int_amdgcn_s_get_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_barrier_state">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
                                 IntrNoCallback, IntrNoFree]>;
 
+// uint32_t @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
+def int_amdgcn_s_get_named_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_named_barrier_state">,
+  Intrinsic<[llvm_i32_ty], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+                                IntrNoCallback, IntrNoFree]>;
+
 def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">,
   Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
 
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e4b54c7d72b083..8c640ec18e1a49 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -16,6 +16,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUInstrInfo.h"
 #include "AMDGPUMachineFunction.h"
+#include "AMDGPUMemoryUtils.h"
 #include "SIMachineFunctionInfo.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -1508,7 +1509,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
   if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
       G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
     if (!MFI->isModuleEntryFunction() &&
-        GV->getName() != "llvm.amdgcn.module.lds") {
+        GV->getName() != "llvm.amdgcn.module.lds" &&
+        !AMDGPU::isNamedBarrier(*cast<GlobalVariable>(GV))) {
       SDLoc DL(Op);
       const Function &Fn = DAG.getMachineFunction().getFunction();
       DiagnosticInfoUnsupported BadLDSDecl(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 800bdbe04cf70d..1873251ea358b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2181,15 +2181,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
   case Intrinsic::amdgcn_ds_bvh_stack_rtn:
     return selectDSBvhStackIntrinsic(I);
   case Intrinsic::amdgcn_s_barrier_init:
+  case Intrinsic::amdgcn_s_barrier_signal_var:
+    return selectNamedBarrierInit(I, IntrinsicID);
   case Intrinsic::amdgcn_s_barrier_join:
   case Intrinsic::amdgcn_s_wakeup_barrier:
-  case Intrinsic::amdgcn_s_get_barrier_state:
+  case Intrinsic::amdgcn_s_get_named_barrier_state:
     ret...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/114550


More information about the cfe-commits mailing list