[clang] cd3acd1 - [AMDGPU] Remove unused s_barrier_{init, join, leave} instructions (#129548)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Mar 4 08:52:46 PST 2025
Author: Mariusz Sikora
Date: 2025-03-04T17:52:43+01:00
New Revision: cd3acd1bff02d0100cbe74307f29c00a3874bc41
URL: https://github.com/llvm/llvm-project/commit/cd3acd1bff02d0100cbe74307f29c00a3874bc41
DIFF: https://github.com/llvm/llvm-project/commit/cd3acd1bff02d0100cbe74307f29c00a3874bc41.diff
LOG: [AMDGPU] Remove unused s_barrier_{init,join,leave} instructions (#129548)
Added:
Modified:
clang/include/clang/Basic/BuiltinsAMDGPU.def
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/lib/Target/AMDGPU/SOPInstructions.td
llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
Removed:
llvm/test/CodeGen/AMDGPU/s-barrier.ll
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 6d00862dde5ed..44ef404aee72f 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -487,9 +487,6 @@ TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal, "vIi", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_var, "vv*i", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_wait, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_barrier_signal_isfirst, "bIi", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_init, "vv*i", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_join, "vv*", "n", "gfx12-insts")
-TARGET_BUILTIN(__builtin_amdgcn_s_barrier_leave, "vIs", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_barrier_state, "Uii", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_get_named_barrier_state, "Uiv*", "n", "gfx12-insts")
TARGET_BUILTIN(__builtin_amdgcn_s_prefetch_data, "vvC*Ui", "nc", "gfx12-insts")
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
index 1a5043328895a..5d86a9b369429 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12-param-err.cl
@@ -23,13 +23,6 @@ kernel void builtins_amdgcn_s_barrier_signal_isfirst_err(global int* in, global
*out = *in;
}
-kernel void builtins_amdgcn_s_barrier_leave_err(global int* in, global int* out, int barrier) {
-
- __builtin_amdgcn_s_barrier_signal(-1);
- __builtin_amdgcn_s_barrier_leave(barrier); // expected-error {{'__builtin_amdgcn_s_barrier_leave' must be a constant integer}}
- *out = *in;
-}
-
void test_s_buffer_prefetch_data(__amdgpu_buffer_rsrc_t rsrc, unsigned int off)
{
__builtin_amdgcn_s_buffer_prefetch_data(rsrc, off, 31); // expected-error {{'__builtin_amdgcn_s_buffer_prefetch_data' must be a constant integer}}
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
index 234ad4fd8cde6..2dba7fb719376 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
@@ -139,50 +139,6 @@ void test_s_barrier_signal_isfirst(int* a, int* b, int *c)
__builtin_amdgcn_s_barrier_wait(1);
}
-// CHECK-LABEL: @test_s_barrier_init(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
-// CHECK-NEXT: [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[A_ADDR]] to ptr
-// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: store i32 [[A:%.*]], ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
-// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[A_ADDR_ASCAST]], align 4
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) [[TMP1]], i32 [[TMP2]])
-// CHECK-NEXT: ret void
-//
-void test_s_barrier_init(void *bar, int a)
-{
- __builtin_amdgcn_s_barrier_init(bar, a);
-}
-
-// CHECK-LABEL: @test_s_barrier_join(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: [[BAR_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
-// CHECK-NEXT: [[BAR_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[BAR_ADDR]] to ptr
-// CHECK-NEXT: store ptr [[BAR:%.*]], ptr [[BAR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[BAR_ADDR_ASCAST]], align 8
-// CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[TMP0]] to ptr addrspace(3)
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) [[TMP1]])
-// CHECK-NEXT: ret void
-//
-void test_s_barrier_join(void *bar)
-{
- __builtin_amdgcn_s_barrier_join(bar);
-}
-
-// CHECK-LABEL: @test_s_barrier_leave(
-// CHECK-NEXT: entry:
-// CHECK-NEXT: call void @llvm.amdgcn.s.barrier.leave(i16 1)
-// CHECK-NEXT: ret void
-//
-void test_s_barrier_leave()
-{
- __builtin_amdgcn_s_barrier_leave(1);
-}
-
// CHECK-LABEL: @test_s_get_barrier_state(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 3118ded81d4c9..86e050333acc7 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -272,28 +272,11 @@ def int_amdgcn_s_barrier_signal_isfirst : ClangBuiltin<"__builtin_amdgcn_s_barri
Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
-// void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %barrier, i32 %memberCnt)
-// The %barrier and %memberCnt argument must be uniform, otherwise behavior is undefined.
-def int_amdgcn_s_barrier_init : ClangBuiltin<"__builtin_amdgcn_s_barrier_init">,
- Intrinsic<[], [local_ptr_ty, llvm_i32_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent,
- IntrWillReturn, IntrNoCallback, IntrNoFree]>;
-
-// void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %barrier)
-// The %barrier argument must be uniform, otherwise behavior is undefined.
-def int_amdgcn_s_barrier_join : ClangBuiltin<"__builtin_amdgcn_s_barrier_join">,
- Intrinsic<[], [local_ptr_ty], [IntrNoMem, IntrHasSideEffects, IntrConvergent, IntrWillReturn,
- IntrNoCallback, IntrNoFree]>;
-
// void @llvm.amdgcn.s.barrier.wait(i16 %barrierType)
def int_amdgcn_s_barrier_wait : ClangBuiltin<"__builtin_amdgcn_s_barrier_wait">,
Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
IntrWillReturn, IntrNoCallback, IntrNoFree]>;
-// void @llvm.amdgcn.s.barrier.leave(i16 %barrierType)
-def int_amdgcn_s_barrier_leave : ClangBuiltin<"__builtin_amdgcn_s_barrier_leave">,
- Intrinsic<[], [llvm_i16_ty], [ImmArg<ArgIndex<0>>, IntrNoMem, IntrHasSideEffects, IntrConvergent,
- IntrWillReturn, IntrNoCallback, IntrNoFree]>;
-
// uint32_t @llvm.amdgcn.s.get.barrier.state(i32 %barrierId)
// The %barrierType argument must be uniform, otherwise behavior is undefined.
def int_amdgcn_s_get_barrier_state : ClangBuiltin<"__builtin_amdgcn_s_get_barrier_state">,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c59fb411124c9..eb781cbd1c8da 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2322,10 +2322,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
break;
case Intrinsic::amdgcn_ds_bvh_stack_rtn:
return selectDSBvhStackIntrinsic(I);
- case Intrinsic::amdgcn_s_barrier_init:
case Intrinsic::amdgcn_s_barrier_signal_var:
return selectNamedBarrierInit(I, IntrinsicID);
- case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_get_named_barrier_state:
return selectNamedBarrierInst(I, IntrinsicID);
case Intrinsic::amdgcn_s_get_barrier_state:
@@ -5928,8 +5926,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
switch (IntrID) {
default:
llvm_unreachable("not a named barrier op");
- case Intrinsic::amdgcn_s_barrier_join:
- return AMDGPU::S_BARRIER_JOIN_IMM;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_IMM;
};
@@ -5937,8 +5933,6 @@ unsigned getNamedBarrierOp(bool HasInlineConst, Intrinsic::ID IntrID) {
switch (IntrID) {
default:
llvm_unreachable("not a named barrier op");
- case Intrinsic::amdgcn_s_barrier_join:
- return AMDGPU::S_BARRIER_JOIN_M0;
case Intrinsic::amdgcn_s_get_named_barrier_state:
return AMDGPU::S_GET_BARRIER_STATE_M0;
};
@@ -5989,11 +5983,8 @@ bool AMDGPUInstructionSelector::selectNamedBarrierInit(
BuildMI(*MBB, &I, DL, TII.get(AMDGPU::COPY), AMDGPU::M0).addReg(TmpReg4);
constrainSelectedInstRegOperands(*CopyMIB, TII, TRI, RBI);
- unsigned Opc = IntrID == Intrinsic::amdgcn_s_barrier_init
- ? AMDGPU::S_BARRIER_INIT_M0
- : AMDGPU::S_BARRIER_SIGNAL_M0;
MachineInstrBuilder MIB;
- MIB = BuildMI(*MBB, &I, DL, TII.get(Opc));
+ MIB = BuildMI(*MBB, &I, DL, TII.get(AMDGPU::S_BARRIER_SIGNAL_M0));
I.eraseFromParent();
return true;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index b0d2a73fe31d2..e006d5140848f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -156,7 +156,6 @@ class AMDGPUInstructionSelector final : public InstructionSelector {
bool selectNamedBarrierInst(MachineInstr &I, Intrinsic::ID IID) const;
bool selectSBarrierSignalIsfirst(MachineInstr &I, Intrinsic::ID IID) const;
bool selectSGetBarrierState(MachineInstr &I, Intrinsic::ID IID) const;
- bool selectSBarrierLeave(MachineInstr &I) const;
std::pair<Register, unsigned> selectVOP3ModsImpl(Register Src,
bool IsCanonicalizing = true,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
index 57289c3e8bbf4..6568d9031987e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMemoryUtils.cpp
@@ -359,10 +359,7 @@ bool isReallyAClobber(const Value *Ptr, MemoryDef *Def, AAResults *AA) {
case Intrinsic::amdgcn_s_barrier_signal:
case Intrinsic::amdgcn_s_barrier_signal_var:
case Intrinsic::amdgcn_s_barrier_signal_isfirst:
- case Intrinsic::amdgcn_s_barrier_init:
- case Intrinsic::amdgcn_s_barrier_join:
case Intrinsic::amdgcn_s_barrier_wait:
- case Intrinsic::amdgcn_s_barrier_leave:
case Intrinsic::amdgcn_s_get_barrier_state:
case Intrinsic::amdgcn_wave_barrier:
case Intrinsic::amdgcn_sched_barrier:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 96c918a9a7f76..d79200c319b65 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3303,10 +3303,6 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
assert(OpdMapper.getVRegs(1).empty());
constrainOpWithReadfirstlane(B, MI, 1);
return;
- case Intrinsic::amdgcn_s_barrier_join:
- constrainOpWithReadfirstlane(B, MI, 1);
- return;
- case Intrinsic::amdgcn_s_barrier_init:
case Intrinsic::amdgcn_s_barrier_signal_var:
constrainOpWithReadfirstlane(B, MI, 1);
constrainOpWithReadfirstlane(B, MI, 2);
@@ -5272,10 +5268,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_s_sleep_var:
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
break;
- case Intrinsic::amdgcn_s_barrier_join:
- OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
- break;
- case Intrinsic::amdgcn_s_barrier_init:
case Intrinsic::amdgcn_s_barrier_signal_var:
OpdsMapping[1] = getSGPROpMapping(MI.getOperand(1).getReg(), MRI, *TRI);
OpdsMapping[2] = getSGPROpMapping(MI.getOperand(2).getReg(), MRI, *TRI);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 16021eead0c9f..b4f4c6ea4dc5d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -10180,7 +10180,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
return SDValue(DAG.getMachineNode(AMDGPU::SI_END_CF, DL, MVT::Other,
Op->getOperand(2), Chain),
0);
- case Intrinsic::amdgcn_s_barrier_init:
case Intrinsic::amdgcn_s_barrier_signal_var: {
// these two intrinsics have two operands: barrier pointer and member count
SDValue Chain = Op->getOperand(0);
@@ -10188,9 +10187,6 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
SDValue BarOp = Op->getOperand(2);
SDValue CntOp = Op->getOperand(3);
SDValue M0Val;
- unsigned Opc = IntrinsicID == Intrinsic::amdgcn_s_barrier_init
- ? AMDGPU::S_BARRIER_INIT_M0
- : AMDGPU::S_BARRIER_SIGNAL_M0;
// extract the BarrierID from bits 4-9 of BarOp
SDValue BarID;
BarID = DAG.getNode(ISD::SRL, DL, MVT::i32, BarOp,
@@ -10214,40 +10210,8 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
Ops.push_back(copyToM0(DAG, Chain, DL, M0Val).getValue(0));
- auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
- return SDValue(NewMI, 0);
- }
- case Intrinsic::amdgcn_s_barrier_join: {
- // these three intrinsics have one operand: barrier pointer
- SDValue Chain = Op->getOperand(0);
- SmallVector<SDValue, 2> Ops;
- SDValue BarOp = Op->getOperand(2);
- unsigned Opc;
-
- if (isa<ConstantSDNode>(BarOp)) {
- uint64_t BarVal = cast<ConstantSDNode>(BarOp)->getZExtValue();
- Opc = AMDGPU::S_BARRIER_JOIN_IMM;
-
- // extract the BarrierID from bits 4-9 of the immediate
- unsigned BarID = (BarVal >> 4) & 0x3F;
- SDValue K = DAG.getTargetConstant(BarID, DL, MVT::i32);
- Ops.push_back(K);
- Ops.push_back(Chain);
- } else {
- Opc = AMDGPU::S_BARRIER_JOIN_M0;
-
- // extract the BarrierID from bits 4-9 of BarOp, copy to M0[5:0]
- SDValue M0Val;
- M0Val = DAG.getNode(ISD::SRL, DL, MVT::i32, BarOp,
- DAG.getShiftAmountConstant(4, MVT::i32, DL));
- M0Val =
- SDValue(DAG.getMachineNode(AMDGPU::S_AND_B32, DL, MVT::i32, M0Val,
- DAG.getTargetConstant(0x3F, DL, MVT::i32)),
- 0);
- Ops.push_back(copyToM0(DAG, Chain, DL, M0Val).getValue(0));
- }
-
- auto *NewMI = DAG.getMachineNode(Opc, DL, Op->getVTList(), Ops);
+ auto *NewMI = DAG.getMachineNode(AMDGPU::S_BARRIER_SIGNAL_M0, DL,
+ Op->getVTList(), Ops);
return SDValue(NewMI, 0);
}
case Intrinsic::amdgcn_s_prefetch_data: {
diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index ee263f58bcaf2..7e6bce2bf5f12 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -2078,7 +2078,6 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
case AMDGPU::S_MEMREALTIME:
case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0:
case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM:
- case AMDGPU::S_BARRIER_LEAVE:
case AMDGPU::S_GET_BARRIER_STATE_M0:
case AMDGPU::S_GET_BARRIER_STATE_IMM:
ScoreBrackets->updateByEvent(TII, TRI, MRI, SMEM_ACCESS, Inst);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 88b1e477f13e4..79ef1432d512a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -959,11 +959,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isBarrier(unsigned Opcode) const {
return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
- Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
- Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
- Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
- Opcode == AMDGPU::S_BARRIER_LEAVE ||
- Opcode == AMDGPU::S_BARRIER_LEAVE_IMM ||
Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER;
}
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index da186f7058d18..5e62ceac281b8 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -470,24 +470,6 @@ def S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_Pseudo <"s_barrier_signal_isfirst m0", (o
let isConvergent = 1;
}
-def S_BARRIER_INIT_M0 : SOP1_Pseudo <"s_barrier_init m0", (outs), (ins),
- "", []>{
- let SchedRW = [WriteBarrier];
- let isConvergent = 1;
-}
-
-def S_BARRIER_INIT_IMM : SOP1_Pseudo <"s_barrier_init", (outs),
- (ins SplitBarrier:$src0), "$src0", []>{
- let SchedRW = [WriteBarrier];
- let isConvergent = 1;
-}
-
-def S_BARRIER_JOIN_M0 : SOP1_Pseudo <"s_barrier_join m0", (outs), (ins),
- "", []>{
- let SchedRW = [WriteBarrier];
- let isConvergent = 1;
-}
-
} // End Uses = [M0]
def S_BARRIER_SIGNAL_IMM : SOP1_Pseudo <"s_barrier_signal", (outs),
@@ -503,12 +485,6 @@ def S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_Pseudo <"s_barrier_signal_isfirst", (out
let isConvergent = 1;
}
-def S_BARRIER_JOIN_IMM : SOP1_Pseudo <"s_barrier_join", (outs),
- (ins SplitBarrier:$src0), "$src0", []>{
- let SchedRW = [WriteBarrier];
- let isConvergent = 1;
-}
-
} // End has_sdst = 0
def S_GET_BARRIER_STATE_IMM : SOP1_Pseudo <"s_get_barrier_state", (outs SSrc_b32:$sdst),
@@ -1594,17 +1570,6 @@ def S_BARRIER_WAIT : SOPP_Pseudo <"s_barrier_wait", (ins i16imm:$simm16), "$simm
let isConvergent = 1;
}
-def S_BARRIER_LEAVE : SOPP_Pseudo <"s_barrier_leave", (ins)> {
- let SchedRW = [WriteBarrier];
- let simm16 = 0;
- let fixed_imm = 1;
- let isConvergent = 1;
- let Defs = [SCC];
-}
-
-def S_BARRIER_LEAVE_IMM : SOPP_Pseudo <"s_barrier_leave",
- (ins i16imm:$simm16), "$simm16", [(int_amdgcn_s_barrier_leave timm:$simm16)]>;
-
def S_WAKEUP : SOPP_Pseudo <"s_wakeup", (ins) > {
let SubtargetPredicate = isGFX8Plus;
let simm16 = 0;
@@ -2080,13 +2045,9 @@ defm S_SENDMSG_RTN_B64 : SOP1_Real_gfx11_gfx12<0x04d>;
defm S_BARRIER_SIGNAL_M0 : SOP1_M0_Real_gfx12<0x04e>;
defm S_BARRIER_SIGNAL_ISFIRST_M0 : SOP1_M0_Real_gfx12<0x04f>;
defm S_GET_BARRIER_STATE_M0 : SOP1_M0_Real_gfx12<0x050>;
-defm S_BARRIER_INIT_M0 : SOP1_M0_Real_gfx12<0x051>;
-defm S_BARRIER_JOIN_M0 : SOP1_M0_Real_gfx12<0x052>;
defm S_BARRIER_SIGNAL_IMM : SOP1_IMM_Real_gfx12<0x04e>;
defm S_BARRIER_SIGNAL_ISFIRST_IMM : SOP1_IMM_Real_gfx12<0x04f>;
defm S_GET_BARRIER_STATE_IMM : SOP1_IMM_Real_gfx12<0x050>;
-defm S_BARRIER_INIT_IMM : SOP1_IMM_Real_gfx12<0x051>;
-defm S_BARRIER_JOIN_IMM : SOP1_IMM_Real_gfx12<0x052>;
defm S_SLEEP_VAR : SOP1_IMM_Real_gfx12<0x058>;
//===----------------------------------------------------------------------===//
@@ -2563,7 +2524,6 @@ multiclass SOPP_Real_32_gfx12<bits<7> op, string name = !tolower(NAME)> {
}
defm S_BARRIER_WAIT : SOPP_Real_32_gfx12<0x014>;
-defm S_BARRIER_LEAVE : SOPP_Real_32_gfx12<0x015>;
defm S_WAIT_LOADCNT : SOPP_Real_32_gfx12<0x040>;
defm S_WAIT_STORECNT : SOPP_Real_32_gfx12<0x041>;
defm S_WAIT_SAMPLECNT : SOPP_Real_32_gfx12<0x042>;
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
index d88dc204e1336..e4b16a3fa0040 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -485,126 +485,3 @@ body: |
S_ENDPGM 0
...
----
-name: skip_barrier_init_imm
-body: |
- ; CHECK-LABEL: name: skip_barrier_init_imm
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: V_NOP_e32 implicit $exec
- ; CHECK-NEXT: $m0 = S_MOV_B32 -1
- ; CHECK-NEXT: S_BARRIER_INIT_IMM -1, implicit $m0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1, %bb.2
- S_CBRANCH_EXECZ %bb.2, implicit $exec
-
- bb.1:
- successors: %bb.2
- V_NOP_e32 implicit $exec
- $m0 = S_MOV_B32 -1
- S_BARRIER_INIT_IMM -1, implicit $m0
-
- bb.2:
- S_ENDPGM 0
-...
-
----
-name: skip_barrier_init_m0
-body: |
- ; CHECK-LABEL: name: skip_barrier_init_m0
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: V_NOP_e32 implicit $exec
- ; CHECK-NEXT: $m0 = S_MOV_B32 -1
- ; CHECK-NEXT: S_BARRIER_INIT_M0 implicit $m0
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1, %bb.2
- S_CBRANCH_EXECZ %bb.2, implicit $exec
-
- bb.1:
- successors: %bb.2
- V_NOP_e32 implicit $exec
- $m0 = S_MOV_B32 -1
- S_BARRIER_INIT_M0 implicit $m0
-
- bb.2:
- S_ENDPGM 0
-...
-
----
-name: skip_barrier_join_imm
-body: |
- ; CHECK-LABEL: name: skip_barrier_join_imm
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: V_NOP_e32 implicit $exec
- ; CHECK-NEXT: S_BARRIER_JOIN_IMM -1
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1, %bb.2
- S_CBRANCH_EXECZ %bb.2, implicit $exec
-
- bb.1:
- successors: %bb.2
- V_NOP_e32 implicit $exec
- S_BARRIER_JOIN_IMM -1
-
- bb.2:
- S_ENDPGM 0
-...
-
----
-name: skip_barrier_leave
-body: |
- ; CHECK-LABEL: name: skip_barrier_leave
- ; CHECK: bb.0:
- ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.2(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: V_NOP_e32 implicit $exec
- ; CHECK-NEXT: S_BARRIER_LEAVE implicit-def $scc
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2:
- ; CHECK-NEXT: S_ENDPGM 0
- bb.0:
- successors: %bb.1, %bb.2
- S_CBRANCH_EXECZ %bb.2, implicit $exec
-
- bb.1:
- successors: %bb.2
- V_NOP_e32 implicit $exec
- S_BARRIER_LEAVE implicit-def $scc
-
- bb.2:
- S_ENDPGM 0
-...
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
index 7cf8883082458..1f2b3e2c31892 100644
--- a/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
+++ b/llvm/test/CodeGen/AMDGPU/s-barrier-lowering.ll
@@ -11,14 +11,12 @@
define void @func1() {
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
ret void
}
define void @func2() {
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
ret void
}
@@ -26,7 +24,6 @@ define void @func2() {
define amdgpu_kernel void @kernel1() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1.kernel1, i32 11)
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 11)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar1)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
call void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3) @bar1)
%state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar1)
@@ -39,7 +36,6 @@ define amdgpu_kernel void @kernel1() #0 {
define amdgpu_kernel void @kernel2() #0 {
; CHECK-DAG: call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar1, i32 9)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar1)
call void @llvm.amdgcn.s.barrier.wait(i16 1)
call void @func2()
@@ -51,9 +47,6 @@ declare void @llvm.amdgcn.s.barrier.wait(i16) #1
declare void @llvm.amdgcn.s.barrier.signal(i32) #1
declare void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3), i32) #1
declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
-declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1
-declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1
-declare void @llvm.amdgcn.s.barrier.leave(i16) #1
declare void @llvm.amdgcn.s.wakeup.barrier(ptr addrspace(3)) #1
declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1
diff --git a/llvm/test/CodeGen/AMDGPU/s-barrier.ll b/llvm/test/CodeGen/AMDGPU/s-barrier.ll
deleted file mode 100644
index 1dcc6a19c29d7..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/s-barrier.ll
+++ /dev/null
@@ -1,275 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s
-
- at bar = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
- at bar2 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
- at bar3 = internal addrspace(3) global target("amdgcn.named.barrier", 0) poison
-
-define void @func1() {
-; GFX12-SDAG-LABEL: func1:
-; GFX12-SDAG: ; %bb.0:
-; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70003
-; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
-; GFX12-SDAG-NEXT: s_barrier_signal m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 3
-; GFX12-SDAG-NEXT: s_barrier_join m0
-; GFX12-SDAG-NEXT: s_barrier_wait 1
-; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-GISEL-LABEL: func1:
-; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70003
-; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
-; GFX12-GISEL-NEXT: s_barrier_signal m0
-; GFX12-GISEL-NEXT: s_barrier_join 3
-; GFX12-GISEL-NEXT: s_barrier_wait 1
-; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar3, i32 7)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar3)
- call void @llvm.amdgcn.s.barrier.wait(i16 1)
- ret void
-}
-
-define void @func2() {
-; GFX12-SDAG-LABEL: func2:
-; GFX12-SDAG: ; %bb.0:
-; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-SDAG-NEXT: s_wait_expcnt 0x0
-; GFX12-SDAG-NEXT: s_wait_samplecnt 0x0
-; GFX12-SDAG-NEXT: s_wait_bvhcnt 0x0
-; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70001
-; GFX12-SDAG-NEXT: s_wait_storecnt 0x0
-; GFX12-SDAG-NEXT: s_barrier_signal m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 1
-; GFX12-SDAG-NEXT: s_barrier_join m0
-; GFX12-SDAG-NEXT: s_barrier_wait 1
-; GFX12-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX12-GISEL-LABEL: func2:
-; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX12-GISEL-NEXT: s_wait_expcnt 0x0
-; GFX12-GISEL-NEXT: s_wait_samplecnt 0x0
-; GFX12-GISEL-NEXT: s_wait_bvhcnt 0x0
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70001
-; GFX12-GISEL-NEXT: s_wait_storecnt 0x0
-; GFX12-GISEL-NEXT: s_barrier_signal m0
-; GFX12-GISEL-NEXT: s_barrier_join 1
-; GFX12-GISEL-NEXT: s_barrier_wait 1
-; GFX12-GISEL-NEXT: s_setpc_b64 s[30:31]
- call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar2, i32 7)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar2)
- call void @llvm.amdgcn.s.barrier.wait(i16 1)
- ret void
-}
-
-define amdgpu_kernel void @kernel1(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
-; GFX12-SDAG-LABEL: kernel1:
-; GFX12-SDAG: ; %bb.0:
-; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x2c
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0
-; GFX12-SDAG-NEXT: s_barrier_init m0
-; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48
-; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
-; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_lshr_b32 s2, s2, 4
-; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT: s_and_b32 s2, s2, 63
-; GFX12-SDAG-NEXT: s_or_b32 s3, 0x90000, s2
-; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-SDAG-NEXT: s_mov_b32 m0, s3
-; GFX12-SDAG-NEXT: s_barrier_init m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 0xc0002
-; GFX12-SDAG-NEXT: s_barrier_signal m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, s3
-; GFX12-SDAG-NEXT: s_barrier_signal m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
-; GFX12-SDAG-NEXT: s_barrier_signal -1
-; GFX12-SDAG-NEXT: s_barrier_signal_isfirst -1
-; GFX12-SDAG-NEXT: s_barrier_join m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
-; GFX12-SDAG-NEXT: s_barrier_wait 1
-; GFX12-SDAG-NEXT: s_barrier_leave
-; GFX12-SDAG-NEXT: s_get_barrier_state s3, m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, s2
-; GFX12-SDAG-NEXT: s_get_barrier_state s2, m0
-; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3]
-; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3
-; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func1 at gotpcrel32@lo+8
-; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func1 at gotpcrel32@hi+16
-; GFX12-SDAG-NEXT: s_barrier_signal -1
-; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
-; GFX12-SDAG-NEXT: s_barrier_wait -1
-; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3]
-; GFX12-SDAG-NEXT: s_getpc_b64 s[2:3]
-; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
-; GFX12-SDAG-NEXT: s_sext_i32_i16 s3, s3
-; GFX12-SDAG-NEXT: s_add_co_u32 s2, s2, func2 at gotpcrel32@lo+12
-; GFX12-SDAG-NEXT: s_wait_alu 0xfffe
-; GFX12-SDAG-NEXT: s_add_co_ci_u32 s3, s3, func2 at gotpcrel32@hi+24
-; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
-; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[2:3]
-; GFX12-SDAG-NEXT: s_get_barrier_state s0, -1
-; GFX12-SDAG-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: kernel1:
-; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: s_mov_b64 s[12:13], s[4:5]
-; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX12-GISEL-NEXT: s_load_b32 s0, s[12:13], 0x2c
-; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0
-; GFX12-GISEL-NEXT: s_barrier_init m0
-; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_lshr_b32 s0, s0, 4
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-GISEL-NEXT: s_and_b32 s0, s0, 63
-; GFX12-GISEL-NEXT: s_or_b32 s1, s0, 0x90000
-; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-GISEL-NEXT: s_mov_b32 m0, s1
-; GFX12-GISEL-NEXT: s_barrier_init m0
-; GFX12-GISEL-NEXT: s_mov_b32 m0, 0xc0002
-; GFX12-GISEL-NEXT: s_barrier_signal m0
-; GFX12-GISEL-NEXT: s_mov_b32 m0, s1
-; GFX12-GISEL-NEXT: s_barrier_signal m0
-; GFX12-GISEL-NEXT: s_barrier_signal -1
-; GFX12-GISEL-NEXT: s_barrier_signal_isfirst -1
-; GFX12-GISEL-NEXT: s_mov_b32 m0, s0
-; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
-; GFX12-GISEL-NEXT: s_barrier_join m0
-; GFX12-GISEL-NEXT: s_barrier_wait 1
-; GFX12-GISEL-NEXT: s_barrier_leave
-; GFX12-GISEL-NEXT: s_get_barrier_state s0, 2
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_get_barrier_state s0, m0
-; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1]
-; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func1 at gotpcrel32@lo+8
-; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func1 at gotpcrel32@hi+16
-; GFX12-GISEL-NEXT: s_barrier_signal -1
-; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX12-GISEL-NEXT: s_barrier_wait -1
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX12-GISEL-NEXT: s_add_co_u32 s8, s12, 48
-; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s13, 0
-; GFX12-GISEL-NEXT: s_getpc_b64 s[0:1]
-; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_sext_i32_i16 s1, s1
-; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, func2 at gotpcrel32@lo+12
-; GFX12-GISEL-NEXT: s_wait_alu 0xfffe
-; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, func2 at gotpcrel32@hi+24
-; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[0:1]
-; GFX12-GISEL-NEXT: s_get_barrier_state s0, -1
-; GFX12-GISEL-NEXT: s_endpgm
- call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) @bar, i32 12)
- call void @llvm.amdgcn.s.barrier.init(ptr addrspace(3) %in, i32 9)
- call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 12)
- call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) %in, i32 9)
- call void @llvm.amdgcn.s.barrier.signal(i32 -1)
- %isfirst = call i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32 -1)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) %in)
- call void @llvm.amdgcn.s.barrier.wait(i16 1)
- call void @llvm.amdgcn.s.barrier.leave(i16 1)
- %state = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) @bar)
- %state2 = call i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3) %in)
- call void @llvm.amdgcn.s.barrier()
- call void @func1()
- call void @func2()
- %state3 = call i32 @llvm.amdgcn.s.get.barrier.state(i32 -1)
- ret void
-}
-
-define amdgpu_kernel void @kernel2(ptr addrspace(1) %out, ptr addrspace(3) %in) #0 {
-; GFX12-SDAG-LABEL: kernel2:
-; GFX12-SDAG: ; %bb.0:
-; GFX12-SDAG-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX12-SDAG-NEXT: s_getpc_b64 s[6:7]
-; GFX12-SDAG-NEXT: s_sext_i32_i16 s7, s7
-; GFX12-SDAG-NEXT: s_add_co_u32 s6, s6, func2 at gotpcrel32@lo+8
-; GFX12-SDAG-NEXT: s_add_co_ci_u32 s7, s7, func2 at gotpcrel32@hi+16
-; GFX12-SDAG-NEXT: v_mov_b32_e32 v31, v0
-; GFX12-SDAG-NEXT: s_load_b64 s[12:13], s[6:7], 0x0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 0x70002
-; GFX12-SDAG-NEXT: s_add_nc_u64 s[8:9], s[4:5], 48
-; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
-; GFX12-SDAG-NEXT: s_barrier_signal m0
-; GFX12-SDAG-NEXT: s_mov_b32 m0, 2
-; GFX12-SDAG-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX12-SDAG-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX12-SDAG-NEXT: s_mov_b32 s32, 0
-; GFX12-SDAG-NEXT: s_barrier_join m0
-; GFX12-SDAG-NEXT: s_barrier_wait 1
-; GFX12-SDAG-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX12-SDAG-NEXT: s_endpgm
-;
-; GFX12-GISEL-LABEL: kernel2:
-; GFX12-GISEL: ; %bb.0:
-; GFX12-GISEL-NEXT: s_add_co_u32 s8, s4, 48
-; GFX12-GISEL-NEXT: s_add_co_ci_u32 s9, s5, 0
-; GFX12-GISEL-NEXT: s_getpc_b64 s[4:5]
-; GFX12-GISEL-NEXT: s_sext_i32_i16 s5, s5
-; GFX12-GISEL-NEXT: s_add_co_u32 s4, s4, func2 at gotpcrel32@lo+8
-; GFX12-GISEL-NEXT: s_add_co_ci_u32 s5, s5, func2 at gotpcrel32@hi+16
-; GFX12-GISEL-NEXT: v_mov_b32_e32 v31, v0
-; GFX12-GISEL-NEXT: s_load_b64 s[12:13], s[4:5], 0x0
-; GFX12-GISEL-NEXT: s_mov_b64 s[10:11], s[6:7]
-; GFX12-GISEL-NEXT: s_mov_b32 m0, 0x70002
-; GFX12-GISEL-NEXT: s_mov_b64 s[4:5], s[0:1]
-; GFX12-GISEL-NEXT: s_mov_b64 s[6:7], s[2:3]
-; GFX12-GISEL-NEXT: s_mov_b32 s32, 0
-; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
-; GFX12-GISEL-NEXT: s_barrier_signal m0
-; GFX12-GISEL-NEXT: s_barrier_join 2
-; GFX12-GISEL-NEXT: s_barrier_wait 1
-; GFX12-GISEL-NEXT: s_swappc_b64 s[30:31], s[12:13]
-; GFX12-GISEL-NEXT: s_endpgm
- call void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3) @bar, i32 7)
- call void @llvm.amdgcn.s.barrier.join(ptr addrspace(3) @bar)
- call void @llvm.amdgcn.s.barrier.wait(i16 1)
-
- call void @func2()
- ret void
-}
-
-declare void @llvm.amdgcn.s.barrier() #1
-declare void @llvm.amdgcn.s.barrier.wait(i16) #1
-declare void @llvm.amdgcn.s.barrier.signal(i32) #1
-declare void @llvm.amdgcn.s.barrier.signal.var(ptr addrspace(3), i32) #1
-declare i1 @llvm.amdgcn.s.barrier.signal.isfirst(i32) #1
-declare void @llvm.amdgcn.s.barrier.init(ptr addrspace(3), i32) #1
-declare void @llvm.amdgcn.s.barrier.join(ptr addrspace(3)) #1
-declare void @llvm.amdgcn.s.barrier.leave(i16) #1
-declare i32 @llvm.amdgcn.s.get.barrier.state(i32) #1
-declare i32 @llvm.amdgcn.s.get.named.barrier.state(ptr addrspace(3)) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent nounwind }
-attributes #2 = { nounwind readnone }
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
index d93ea2e82c1d2..1e12e5bb48828 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sop1.s
@@ -708,24 +708,6 @@ s_barrier_signal_isfirst -1
s_barrier_signal_isfirst m0
// GFX12: encoding: [0x7d,0x4f,0x80,0xbe]
-s_barrier_init -1
-// GFX12: encoding: [0xc1,0x51,0x80,0xbe]
-
-s_barrier_init -2
-// GFX12: encoding: [0xc2,0x51,0x80,0xbe]
-
-s_barrier_init m0
-// GFX12: encoding: [0x7d,0x51,0x80,0xbe]
-
-s_barrier_join -1
-// GFX12: encoding: [0xc1,0x52,0x80,0xbe]
-
-s_barrier_join -2
-// GFX12: encoding: [0xc2,0x52,0x80,0xbe]
-
-s_barrier_join m0
-// GFX12: encoding: [0x7d,0x52,0x80,0xbe]
-
s_get_barrier_state s3, -1
// GFX12: encoding: [0xc1,0x50,0x83,0xbe]
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
index fdcabc4352c69..97b6e3ba22b0c 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_sopp.s
@@ -75,9 +75,6 @@ s_barrier_wait 0xffff
s_barrier_wait 1
// GFX12: encoding: [0x01,0x00,0x94,0xbf]
-s_barrier_leave
-// GFX12: encoding: [0x00,0x00,0x95,0xbf]
-
//===----------------------------------------------------------------------===//
// s_waitcnt
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
index 2cb6da42213e3..fa7d020bdd726 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sop1.txt
@@ -708,24 +708,6 @@
# GFX12: s_barrier_signal_isfirst m0 ; encoding: [0x7d,0x4f,0x80,0xbe]
0x7d,0x4f,0x80,0xbe
-# GFX12: s_barrier_init -1 ; encoding: [0xc1,0x51,0x80,0xbe]
-0xc1,0x51,0x80,0xbe
-
-# GFX12: s_barrier_init -2 ; encoding: [0xc2,0x51,0x80,0xbe]
-0xc2,0x51,0x80,0xbe
-
-# GFX12: s_barrier_init m0 ; encoding: [0x7d,0x51,0x80,0xbe]
-0x7d,0x51,0x80,0xbe
-
-# GFX12: s_barrier_join -1 ; encoding: [0xc1,0x52,0x80,0xbe]
-0xc1,0x52,0x80,0xbe
-
-# GFX12: s_barrier_join -2 ; encoding: [0xc2,0x52,0x80,0xbe]
-0xc2,0x52,0x80,0xbe
-
-# GFX12: s_barrier_join m0 ; encoding: [0x7d,0x52,0x80,0xbe]
-0x7d,0x52,0x80,0xbe
-
# GFX12: s_get_barrier_state s3, -1 ; encoding: [0xc1,0x50,0x83,0xbe]
0xc1,0x50,0x83,0xbe
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
index d69801512c078..266ebf3822d3b 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_sopp.txt
@@ -67,9 +67,6 @@
# GFX12: s_barrier_wait 1 ; encoding: [0x01,0x00,0x94,0xbf]
0x01,0x00,0x94,0xbf
-# GFX12: s_barrier_leave ; encoding: [0x00,0x00,0x95,0xbf]
-0x00,0x00,0x95,0xbf
-
# GFX12: s_branch 0 ; encoding: [0x00,0x00,0xa0,0xbf]
0x00,0x00,0xa0,0xbf
More information about the cfe-commits
mailing list