[clang] [llvm] [AMDGPU] modify named barrier builtins and intrinsics (PR #114550)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Nov 1 08:34:59 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Gang Chen (cmc-rep)
<details>
<summary>Changes</summary>
---
Patch is 132.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/114550.diff
21 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsAMDGPU.def (+6-6)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl (+7)
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (+59-79)
- (modified) llvm/include/llvm/IR/IntrinsicsAMDGPU.td (+32-9)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (+3-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (+122-73)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h (+2)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (+3-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp (+124)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp (+7)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp (+22-1)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.h (+5)
- (modified) llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp (+7-16)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+6)
- (modified) llvm/lib/Target/AMDGPU/SIISelLowering.cpp (+65-42)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+2-2)
- (modified) llvm/lib/Target/AMDGPU/SOPInstructions.td (+6-4)
- (modified) llvm/test/Assembler/target-type-param-errors.ll (+5)
- (removed) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.wait.ll (-1373)
- (added) llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll (+66)
- (added) llvm/test/CodeGen/AMDGPU/s-barrier.ll (+299)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 29001e32085151..8f44afa4059386 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -439,15 +439,15 @@ TARGET_BUILTIN(__builtin_amdgcn_s_sleep_var, "vUi", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_permlane16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_permlanex16_var, "UiUiUiUiIbIb", "nc", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vi", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vv*i", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst_var, "bi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vii", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "b", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_wakeup_barrier, "vv*", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
+TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_buffer_prefetch_data, "vQbIiUi", "nc", "gfx12-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
index 5d86a9b369429f..1a5043328895ac 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
@@ -23,6 +23,13 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global
*out = *in;
}
+kernel void builtins_amdgcn_s_barrier_leave_err(global int* in, global int* out, int barrier) {
+
+ __builtin_amdgcn_s_barrier_signal(-1);
+ __builtin_amdgcn_s_barrier_leave(barrier); // expected-error {{'__builtin_amdgcn_s_barrier_leave' must be a constant integer}}
+ *out = *in;
+}
+
void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off)
{
__builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index 9bfedac0032965..b1866a8e492c84 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -87,16 +87,21 @@ void test_s_barrier_signal()
// CHECK-LABEL: @test_s_barrier_signal_var(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(i32 [[TMP0]])
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
// CHECK-NEXT: ret void
//
-void test_s_barrier_signal_var(int a)
+void test_s_barrier_signal_var(void *bar, int a)
{
- __builtin_amdgcn_s_barrier_signal_var(a);
+ __builtin_amdgcn_s_barrier_signal_var(bar, a);
}
// CHECK-LABEL: @test_s_barrier_signal_isfirst(
@@ -134,110 +139,63 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
__builtin_amdgcn_s_barrier_wait(1);
}
-// CHECK-LABEL: @test_s_barrier_isfirst_var(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
-// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
-// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
-// CHECK-NEXT: [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[D_ADDR]] to ptr
-// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[D:%.*]], ptr [[D_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[D_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.s.barrier.signal.isfirst.var(i32 [[TMP0]])
-// CHECK-NEXT: br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK: if.then:
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label [[IF_END:%.*]]
-// CHECK: if.else:
-// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[TMP3]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label [[IF_END]]
-// CHECK: if.end:
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.wait(i16 1)
-// CHECK-NEXT: ret void
-//
-void test_s_barrier_isfirst_var(int* a, int* b, int *c, int d)
-{
- if ( __builtin_amdgcn_s_barrier_signal_isfirst_var(d))
- a = b;
- else
- a = c;
-
- __builtin_amdgcn_s_barrier_wait(1);
-
-}
-
// CHECK-LABEL: @test_s_barrier_init(
// CHECK-NEXT: entry:
+// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(i32 1, i32 [[TMP0]])
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
+// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
// CHECK-NEXT: ret void
//
-void test_s_barrier_init(int a)
+void test_s_barrier_init(void *bar, int a)
{
- __builtin_amdgcn_s_barrier_init(1, a);
+ __builtin_amdgcn_s_barrier_init(bar, a);
}
// CHECK-LABEL: @test_s_barrier_join(
// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1)
+// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) [[TMP1]])
// CHECK-NEXT: ret void
//
-void test_s_barrier_join()
+void test_s_barrier_join(void *bar)
{
- __builtin_amdgcn_s_barrier_join(1);
+ __builtin_amdgcn_s_barrier_join(bar);
}
// CHECK-LABEL: @test_s_wakeup_barrier(
// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(i32 1)
+// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT: call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) [[TMP1]])
// CHECK-NEXT: ret void
//
-void test_s_wakeup_barrier()
+void test_s_wakeup_barrier(void *bar)
{
- __builtin_amdgcn_s_barrier_join(1);
+ __builtin_amdgcn_s_wakeup_barrier(bar);
}
// CHECK-LABEL: @test_s_barrier_leave(
// CHECK-NEXT: entry:
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[B_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[C_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
-// CHECK-NEXT: [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[B_ADDR]] to ptr
-// CHECK-NEXT: [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[C_ADDR]] to ptr
-// CHECK-NEXT: store ptr [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[B:%.*]], ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[C:%.*]], ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.s.barrier.leave()
-// CHECK-NEXT: br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-// CHECK: if.then:
-// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[B_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[TMP1]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label [[IF_END:%.*]]
-// CHECK: if.else:
-// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[C_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store ptr [[TMP2]], ptr [[A_ADDR_ASCAST]], align 8
-// CHECK-NEXT: br label [[IF_END]]
-// CHECK: if.end:
+// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1)
// CHECK-NEXT: ret void
//
-void test_s_barrier_leave(int* a, int* b, int *c)
+void test_s_barrier_leave()
{
- if (__builtin_amdgcn_s_barrier_leave())
- a = b;
- else
- a = c;
+ __builtin_amdgcn_s_barrier_leave(1);
}
// CHECK-LABEL: @test_s_get_barrier_state(
@@ -261,6 +219,28 @@ unsigned test_s_get_barrier_state(int a)
return State;
}
+// CHECK-LABEL: @test_s_get_named_barrier_state(
+// CHECK-NEXT: entry:
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
+// CHECK-NEXT: [[STATE:%.*]] = alloca i32, align 4, addrspace(5)
+// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
+// CHECK-NEXT: [[STATE_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[STATE]] to ptr
+// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
+// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
+// CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) [[TMP1]])
+// CHECK-NEXT: store i32 [[TMP2]], ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[STATE_ASCAST]], align 4
+// CHECK-NEXT: ret i32 [[TMP3]]
+//
+unsigned test_s_get_named_barrier_state(void *bar)
+{
+ unsigned State = __builtin_amdgcn_s_get_named_barrier_state(bar);
+ return State;
+}
+
// CHECK-LABEL: @test_s_ttracedata(
// CHECK-NEXT: entry:
// CHECK-NEXT: call void @llvm.amdgcn.s.ttracedata(i32 1)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 143b538b361c9c..d6375ab77cfb32 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===//
def global_ptr_ty : LLVMQualPointerType<1>;
+def local_ptr_ty : LLVMQualPointerType<3>;
// The amdgpu-no-* attributes (ex amdgpu-no-workitem-id-z) typically inferred
// by the backend cause whole-program undefined behavior when violated, such as
@@ -247,48 +248,70 @@ def int_amdgcn_s_sendmsghalt : ClangBuiltin<"__builtin_amdgcn_s_sendmsghalt">,
def int_amdgcn_s_sendmsg_rtn : Intrinsic <[llvm_anyint_ty], [llvm_i32_ty],
[ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects]>;
+// Vanilla workgroup sync-barrier
def int_amdgcn_s_barrier : ClangBuiltin<"__builtin_amdgcn_s_barrier">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+// Lower-level split-barrier intrinsics
+
+// void @llvm.amdgcn.s.barrier.signal(i32 %barrierType)
+// only for non-named barrier
def int_amdgcn_s_barrier_signal : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal">,
Intrinsic<[], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;
+// void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %barrier, i32 %memberCnt)
+// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_barrier_signal_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_var">,
- Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+ Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;
+// bool @llvm.amdgcn.s.barrier.signal.isfirst(i32 %barrierType)
+// only for non-named barrier
def int_amdgcn_s_barrier_signal_isfirst : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst">,
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
-def int_amdgcn_s_barrier_signal_isfirst_var : ClangBuiltin<"__builtin_amdgcn_s_barrier_signal_isfirst_var">,
- Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
- IntrNoCallback, IntrNoFree]>;
-
+// void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %barrier, i32 %memberCnt)
+// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_barrier_init : ClangBuiltin<"__builtin_amdgcn_s_barrier_init">,
- Intrinsic<[], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent,
+ Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent,
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+// void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
- Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+ Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;
+// void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_wakeup_barrier : ClangBuiltin<"__builtin_amdgcn_s_wakeup_barrier">,
- Intrinsic<[], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+ Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;
+// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+// void @llvm.amdgcn.s.barrier.leave(i16 %barrierType)
def int_amdgcn_s_barrier_leave : ClangBuiltin<"__builtin_amdgcn_s_barrier_leave">,
- Intrinsic<[llvm_i1_ty], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+ Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
+ IntrWillReturn, IntrNoCallback, IntrNoFree]>;
+// uint32_t @llvm.amdgcn.s.get.barrier.state(i32 %barrierId)
+// The %barrierType argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_get_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_barrier_state">,
Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
IntrNoCallback, IntrNoFree]>;
+// uint32_t @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %barrier)
+// The %barrier argument must be uniform, otherwise behavior is undefined.
+def int_amdgcn_s_get_named_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_named_barrier_state">,
+ Intrinsic<[llvm_i32_ty], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
+ IntrNoCallback, IntrNoFree]>;
+
def int_amdgcn_wave_barrier : ClangBuiltin<"__builtin_amdgcn_wave_barrier">,
Intrinsic<[], [], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn, IntrNoCallback, IntrNoFree]>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index e4b54c7d72b083..8c640ec18e1a49 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -16,6 +16,7 @@
#include "AMDGPU.h"
#include "AMDGPUInstrInfo.h"
#include "AMDGPUMachineFunction.h"
+#include "AMDGPUMemoryUtils.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -1508,7 +1509,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) {
if (!MFI->isModuleEntryFunction() &&
- GV->getName() != "llvm.amdgcn.module.lds") {
+ GV->getName() != "llvm.amdgcn.module.lds" &&
+ !AMDGPU::isNamedBarrier(*cast<GlobalVariable>(GV))) {
SDLoc DL(Op);
const Function &Fn = DAG.getMachineFunction().getFunction();
DiagnosticInfoUnsupported BadLDSDecl(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 800bdbe04cf70d..1873251ea358b1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2181,15 +2181,16 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
return selectDSBvhStackIntrinsic(I);
case Intrinsic::amdgcn_s_barrier_init:
+ case Intrinsic::amdgcn_s_barrier_signal_var:
+ return selectNamedBarrierInit(I, IntrinsicID);
case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_wakeup_barrier:
- case Intrinsic::amdgcn_s_get_barrier_state:
+ case Intrinsic::amdgcn_s_get_named_barrier_state:
ret...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/114550
More information about the cfe-commits
mailing list