[llvm] r363797 - Revert rL363678 : AMDGPU: Add ds_gws_init / ds_gws_barrier intrinsics
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 19 06:00:54 PDT 2019
Author: rksimon
Date: Wed Jun 19 06:00:54 2019
New Revision: 363797
URL: http://llvm.org/viewvc/llvm-project?rev=363797&view=rev
Log:
Revert rL363678 : AMDGPU: Add ds_gws_init / ds_gws_barrier intrinsics
There may or may not be additional work to handle this correctly on
SI/CI.
........
Breaks EXPENSIVE_CHECKS buildbots - http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-win/builds/78/
Removed:
llvm/trunk/test/CodeGen/AMDGPU/gws-hazards.mir
llvm/trunk/test/CodeGen/AMDGPU/insert-skips-gws.mir
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
llvm/trunk/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Wed Jun 19 06:00:54 2019
@@ -1348,28 +1348,6 @@ def int_amdgcn_alignbyte : Intrinsic<[ll
[IntrNoMem, IntrSpeculatable]
>;
-// llvm.amdgcn.ds.gws.init(i32 bar_val, i32 resource_id)
-//
-// bar_val is the total number of waves that will wait on this
-// barrier, minus 1.
-def int_amdgcn_ds_gws_init :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_init">,
- Intrinsic<[],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrWriteMem, IntrInaccessibleMemOnly], "",
- [SDNPMemOperand]
->;
-
-// llvm.amdgcn.ds.gws.barrier(i32 vsrc0, i32 resource_id)
-// bar_val is the total number of waves that will wait on this
-// barrier, minus 1.
-def int_amdgcn_ds_gws_barrier :
- GCCBuiltin<"__builtin_amdgcn_ds_gws_barrier">,
- Intrinsic<[],
- [llvm_i32_ty, llvm_i32_ty],
- [IntrConvergent, IntrInaccessibleMemOnly], "",
- [SDNPMemOperand]
->;
// Copies the source value to the destination value, with the guarantee that
// the source value is computed as if the entire program were executed in WQM.
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp Wed Jun 19 06:00:54 2019
@@ -218,9 +218,7 @@ private:
void SelectFMAD_FMA(SDNode *N);
void SelectATOMIC_CMP_SWAP(SDNode *N);
void SelectDSAppendConsume(SDNode *N, unsigned IntrID);
- void SelectDS_GWS(SDNode *N, unsigned IntrID);
void SelectINTRINSIC_W_CHAIN(SDNode *N);
- void SelectINTRINSIC_VOID(SDNode *N);
protected:
// Include the pieces autogenerated from the target description.
@@ -834,10 +832,6 @@ void AMDGPUDAGToDAGISel::Select(SDNode *
SelectINTRINSIC_W_CHAIN(N);
return;
}
- case ISD::INTRINSIC_VOID: {
- SelectINTRINSIC_VOID(N);
- return;
- }
}
SelectCode(N);
@@ -2040,73 +2034,6 @@ void AMDGPUDAGToDAGISel::SelectDSAppendC
CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
}
-void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) {
- SDLoc SL(N);
- SDValue VSrc0 = N->getOperand(2);
- SDValue BaseOffset = N->getOperand(3);
- int ImmOffset = 0;
- SDNode *CopyToM0;
- MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N);
- MachineMemOperand *MMO = M->getMemOperand();
-
- // Don't worry if the offset ends up in a VGPR. Only one lane will have
- // effect, so SIFixSGPRCopies will validly insert readfirstlane.
-
- // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
- // offset field) % 64. Some versions of the programming guide omit the m0
- // part, or claim it's from offset 0.
- if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) {
- // If we have a constant offset, try to use the default value for m0 as a
- // base to possibly avoid setting it up.
- CopyToM0 = glueCopyToM0(N, CurDAG->getTargetConstant(-1, SL, MVT::i32));
- ImmOffset = ConstOffset->getZExtValue() + 1;
- } else {
- if (CurDAG->isBaseWithConstantOffset(BaseOffset)) {
- ImmOffset = BaseOffset.getConstantOperandVal(1);
- BaseOffset = BaseOffset.getOperand(0);
- }
-
- // Prefer to do the shift in an SGPR since it should be possible to use m0
- // as the result directly. If it's already an SGPR, it will be eliminated
- // later.
- SDNode *SGPROffset
- = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32,
- BaseOffset);
- // Shift to offset in m0
- SDNode *M0Base
- = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32,
- SDValue(SGPROffset, 0),
- CurDAG->getTargetConstant(16, SL, MVT::i32));
- CopyToM0 = glueCopyToM0(N, SDValue(M0Base, 0));
- }
-
- // The manual doesn't mention this, but it seems only v0 works.
- SDValue V0 = CurDAG->getRegister(AMDGPU::VGPR0, MVT::i32);
-
- SDValue CopyToV0 = CurDAG->getCopyToReg(
- SDValue(CopyToM0, 0), SL, V0, VSrc0,
- N->getOperand(N->getNumOperands() - 1));
-
- SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32);
-
- // TODO: Can this just be removed from the instruction?
- SDValue GDS = CurDAG->getTargetConstant(1, SL, MVT::i1);
-
- unsigned Opc = IntrID == Intrinsic::amdgcn_ds_gws_init ?
- AMDGPU::DS_GWS_INIT : AMDGPU::DS_GWS_BARRIER;
-
- SDValue Ops[] = {
- V0,
- OffsetField,
- GDS,
- CopyToV0, // Chain
- CopyToV0.getValue(1) // Glue
- };
-
- SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops);
- CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO});
-}
-
void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) {
unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
switch (IntrID) {
@@ -2117,18 +2044,6 @@ void AMDGPUDAGToDAGISel::SelectINTRINSIC
SelectDSAppendConsume(N, IntrID);
return;
}
- }
-
- SelectCode(N);
-}
-
-void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) {
- unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
- switch (IntrID) {
- case Intrinsic::amdgcn_ds_gws_init:
- case Intrinsic::amdgcn_ds_gws_barrier:
- SelectDS_GWS(N, IntrID);
- return;
default:
break;
}
Modified: llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/DSInstructions.td?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/DSInstructions.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/DSInstructions.td Wed Jun 19 06:00:54 2019
@@ -467,15 +467,11 @@ defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET_mc<
defm DS_WRXCHG2_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>;
defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_Off8_RET_mc<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>;
-let isConvergent = 1 in {
-def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init"> {
- let mayLoad = 0;
-}
+def DS_GWS_INIT : DS_GWS_1D<"ds_gws_init">;
def DS_GWS_SEMA_V : DS_GWS_0D<"ds_gws_sema_v">;
def DS_GWS_SEMA_BR : DS_GWS_1D<"ds_gws_sema_br">;
def DS_GWS_SEMA_P : DS_GWS_0D<"ds_gws_sema_p">;
def DS_GWS_BARRIER : DS_GWS_1D<"ds_gws_barrier">;
-}
def DS_ADD_SRC2_U32 : DS_1A<"ds_add_src2_u32">;
def DS_SUB_SRC2_U32 : DS_1A<"ds_sub_src2_u32">;
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Jun 19 06:00:54 2019
@@ -961,24 +961,6 @@ bool SITargetLowering::getTgtMemIntrinsi
return true;
}
- case Intrinsic::amdgcn_ds_gws_init:
- case Intrinsic::amdgcn_ds_gws_barrier: {
- Info.opc = ISD::INTRINSIC_VOID;
-
- SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
- Info.ptrVal =
- MFI->getGWSPSV(*MF.getSubtarget<GCNSubtarget>().getInstrInfo());
-
- // This is an abstract access, but we need to specify a type and size.
- Info.memVT = MVT::i32;
- Info.size = 4;
- Info.align = 4;
-
- Info.flags = MachineMemOperand::MOStore;
- if (IntrID == Intrinsic::amdgcn_ds_gws_barrier)
- Info.flags = MachineMemOperand::MOLoad;
- return true;
- }
default:
return false;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed Jun 19 06:00:54 2019
@@ -536,19 +536,15 @@ void WaitcntBrackets::updateByEvent(cons
// Put score on the source vgprs. If this is a store, just use those
// specific register(s).
if (TII->isDS(Inst) && (Inst.mayStore() || Inst.mayLoad())) {
- int AddrOpIdx =
- AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr);
// All GDS operations must protect their address register (same as
// export.)
- if (AddrOpIdx != -1) {
- setExpScore(&Inst, TII, TRI, MRI, AddrOpIdx, CurrScore);
- } else {
- assert(Inst.getOpcode() == AMDGPU::DS_APPEND ||
- Inst.getOpcode() == AMDGPU::DS_CONSUME ||
- Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
- Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER);
+ if (Inst.getOpcode() != AMDGPU::DS_APPEND &&
+ Inst.getOpcode() != AMDGPU::DS_CONSUME) {
+ setExpScore(
+ &Inst, TII, TRI, MRI,
+ AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::addr),
+ CurrScore);
}
-
if (Inst.mayStore()) {
if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
AMDGPU::OpName::data0) != -1) {
@@ -1411,6 +1407,18 @@ bool SIInsertWaitcnts::insertWaitcntInBl
ScoreBrackets.dump();
});
+ // Check to see if this is a GWS instruction. If so, and if this is CI or
+ // VI, then the generated code sequence will include an S_WAITCNT 0.
+ // TODO: Are these the only GWS instructions?
+ if (Inst.getOpcode() == AMDGPU::DS_GWS_INIT ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_V ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_BR ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
+ Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
+ // TODO: && context->target_info->GwsRequiresMemViolTest() ) {
+ ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
+ }
+
// TODO: Remove this work-around after fixing the scheduler and enable the
// assert above.
if (VCCZBugWorkAround) {
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed Jun 19 06:00:54 2019
@@ -2547,8 +2547,7 @@ bool SIInstrInfo::hasUnwantedEffectsWhen
// given the typical code patterns.
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
- Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
- Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
+ Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP)
return true;
if (MI.isCall() || MI.isInlineAsm())
Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Wed Jun 19 06:00:54 2019
@@ -43,8 +43,7 @@ class AMDGPUPseudoSourceValue : public P
public:
enum AMDGPUPSVKind : unsigned {
PSVBuffer = PseudoSourceValue::TargetCustom,
- PSVImage,
- GWSResource
+ PSVImage
};
protected:
@@ -88,30 +87,6 @@ public:
}
};
-class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
-public:
- explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
- : AMDGPUPseudoSourceValue(GWSResource, TII) {}
-
- static bool classof(const PseudoSourceValue *V) {
- return V->kind() == GWSResource;
- }
-
- // These are inaccessible memory from IR.
- bool isAliased(const MachineFrameInfo *) const override {
- return false;
- }
-
- // These are inaccessible memory from IR.
- bool mayAlias(const MachineFrameInfo *) const override {
- return false;
- }
-
- void printCustom(raw_ostream &OS) const override {
- OS << "GWSResource";
- }
-};
-
namespace yaml {
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
@@ -213,7 +188,6 @@ class SIMachineFunctionInfo final : publ
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
DenseMap<const Value *,
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
- std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
private:
unsigned LDSWaveSpillSize = 0;
@@ -700,15 +674,6 @@ public:
return PSV.first->second.get();
}
- const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
- if (!GWSResourcePSV) {
- GWSResourcePSV =
- llvm::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
- }
-
- return GWSResourcePSV.get();
- }
-
unsigned getOccupancy() const {
return Occupancy;
}
Removed: llvm/trunk/test/CodeGen/AMDGPU/gws-hazards.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/gws-hazards.mir?rev=363796&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/gws-hazards.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/gws-hazards.mir (removed)
@@ -1,103 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=VI %s
-# RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=CI %s
-# RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=SI %s
-
----
-name: m0_gws_init0
-tracksRegLiveness: true
-body: |
-
- bb.0:
- liveins: $vgpr0
- ; GFX9-LABEL: name: m0_gws_init0
- ; GFX9: liveins: $vgpr0
- ; GFX9: $m0 = S_MOV_B32 -1
- ; GFX9: S_NOP 0
- ; GFX9: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; VI-LABEL: name: m0_gws_init0
- ; VI: liveins: $vgpr0
- ; VI: $m0 = S_MOV_B32 -1
- ; VI: S_NOP 0
- ; VI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; CI-LABEL: name: m0_gws_init0
- ; CI: liveins: $vgpr0
- ; CI: $m0 = S_MOV_B32 -1
- ; CI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; SI-LABEL: name: m0_gws_init0
- ; SI: liveins: $vgpr0
- ; SI: $m0 = S_MOV_B32 -1
- ; SI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- $m0 = S_MOV_B32 -1
- DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
-
-...
-
----
-name: m0_gws_init1
-tracksRegLiveness: true
-body: |
-
- bb.0:
- ; GFX9-LABEL: name: m0_gws_init1
- ; GFX9: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GFX9: $m0 = S_MOV_B32 -1
- ; GFX9: S_NOP 0
- ; GFX9: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; VI-LABEL: name: m0_gws_init1
- ; VI: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; VI: $m0 = S_MOV_B32 -1
- ; VI: S_NOP 0
- ; VI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; CI-LABEL: name: m0_gws_init1
- ; CI: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CI: $m0 = S_MOV_B32 -1
- ; CI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; SI-LABEL: name: m0_gws_init1
- ; SI: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; SI: $m0 = S_MOV_B32 -1
- ; SI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- $m0 = S_MOV_B32 -1
- DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
-
-...
-
-# Test a typical situation where m0 needs to be set from a VGPR
-# through readfirstlane
----
-name: m0_gws_readlane
-tracksRegLiveness: true
-body: |
-
- bb.0:
- liveins: $vgpr0, $vgpr1
-
- ; GFX9-LABEL: name: m0_gws_readlane
- ; GFX9: liveins: $vgpr0, $vgpr1
- ; GFX9: $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
- ; GFX9: $m0 = S_MOV_B32 $sgpr0
- ; GFX9: S_NOP 0
- ; GFX9: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; VI-LABEL: name: m0_gws_readlane
- ; VI: liveins: $vgpr0, $vgpr1
- ; VI: $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
- ; VI: $m0 = S_MOV_B32 $sgpr0
- ; VI: S_NOP 0
- ; VI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; CI-LABEL: name: m0_gws_readlane
- ; CI: liveins: $vgpr0, $vgpr1
- ; CI: $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
- ; CI: $m0 = S_MOV_B32 $sgpr0
- ; CI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; SI-LABEL: name: m0_gws_readlane
- ; SI: liveins: $vgpr0, $vgpr1
- ; SI: $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
- ; SI: $m0 = S_MOV_B32 $sgpr0
- ; SI: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- $sgpr0 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
- $m0 = S_MOV_B32 $sgpr0
- DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
-
-...
Removed: llvm/trunk/test/CodeGen/AMDGPU/insert-skips-gws.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/insert-skips-gws.mir?rev=363796&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/insert-skips-gws.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/insert-skips-gws.mir (removed)
@@ -1,59 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-skips -amdgpu-skip-threshold=1 -verify-machineinstrs %s -o - | FileCheck %s
-# Make sure mandatory skips are inserted to ensure GWS ops aren't run with exec = 0
-
----
-
-name: skip_gws_init
-body: |
- ; CHECK-LABEL: name: skip_gws_init
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
- ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.2(0x80000000)
- ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; CHECK: bb.2:
- ; CHECK: S_ENDPGM 0
- bb.0:
- successors: %bb.1, %bb.2
- SI_MASK_BRANCH %bb.2, implicit $exec
-
- bb.1:
- successors: %bb.2
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
-
- bb.2:
- S_ENDPGM 0
-...
-
----
-
-name: skip_gws_barrier
-body: |
- ; CHECK-LABEL: name: skip_gws_barrier
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
- ; CHECK: SI_MASK_BRANCH %bb.2, implicit $exec
- ; CHECK: S_CBRANCH_EXECZ %bb.2, implicit $exec
- ; CHECK: bb.1:
- ; CHECK: successors: %bb.2(0x80000000)
- ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; CHECK: DS_GWS_BARRIER $vgpr0, 0, 1, implicit $m0, implicit $exec
- ; CHECK: bb.2:
- ; CHECK: S_ENDPGM 0
- bb.0:
- successors: %bb.1, %bb.2
- SI_MASK_BRANCH %bb.2, implicit $exec
-
- bb.1:
- successors: %bb.2
- $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- DS_GWS_BARRIER $vgpr0, 0, 1, implicit $m0, implicit $exec
-
- bb.2:
- S_ENDPGM 0
-...
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll?rev=363796&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll (removed)
@@ -1,179 +0,0 @@
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIPLUS %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIPLUS %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CIPLUS %s
-
-; Minimum offset
-; GCN-LABEL: {{^}}gws_barrier_offset0:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: s_mov_b32 m0, -1{{$}}
-; GCN: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_barrier v0 offset:1 gds{{$}}
-define amdgpu_kernel void @gws_barrier_offset0(i32 %val) #0 {
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
- ret void
-}
-
-; Maximum offset
-; GCN-LABEL: {{^}}gws_barrier_offset63:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: s_mov_b32 m0, -1{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_barrier v0 offset:64 gds{{$}}
-define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 {
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 63)
- ret void
-}
-
-; FIXME: Should be able to shift directly into m0
-; GCN-LABEL: {{^}}gws_barrier_sgpr_offset:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, s[[BAR_NUM]]
-; GCN: ds_gws_barrier v0 gds{{$}}
-define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset)
- ret void
-}
-
-; Variable offset in SGPR with constant add
-; GCN-LABEL: {{^}}gws_barrier_sgpr_offset_add1:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, s[[BAR_NUM]]
-; GCN: ds_gws_barrier v0 offset:1 gds{{$}}
-define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
- %offset = add i32 %offset.base, 1
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset)
- ret void
-}
-
-; GCN-LABEL: {{^}}gws_barrier_vgpr_offset:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_barrier v0 gds{{$}}
-define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
- %vgpr.offset = call i32 @llvm.amdgcn.workitem.id.x()
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %vgpr.offset)
- ret void
-}
-
-; Variable offset in VGPR with constant add
-; GCN-LABEL: {{^}}gws_barrier_vgpr_offset_add:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_barrier v0 offset:3 gds{{$}}
-define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 {
- %vgpr.offset.base = call i32 @llvm.amdgcn.workitem.id.x()
- %vgpr.offset = add i32 %vgpr.offset.base, 3
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %vgpr.offset)
- ret void
-}
-
- at lds = internal unnamed_addr addrspace(3) global i32 undef
-
-; Check if m0 initialization is shared
-; GCN-LABEL: {{^}}gws_barrier_save_m0_barrier_constant_offset:
-; GCN: s_mov_b32 m0, -1
-; GCN-NOT: s_mov_b32 m0
-define amdgpu_kernel void @gws_barrier_save_m0_barrier_constant_offset(i32 %val) #0 {
- store i32 1, i32 addrspace(3)* @lds
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 10)
- store i32 2, i32 addrspace(3)* @lds
- ret void
-}
-
-; Make sure this increments lgkmcnt
-; GCN-LABEL: {{^}}gws_barrier_lgkmcnt:
-; GCN: ds_gws_barrier v0 offset:1 gds{{$}}
-; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
-define void @gws_barrier_lgkmcnt(i32 %val) {
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 0)
- ret void
-}
-
-; Does not imply memory fence on its own
-; GCN-LABEL: {{^}}gws_barrier_wait_before:
-; GCN: store_dword
-; CIPLUS-NOT: s_waitcnt
-; GCN: ds_gws_barrier v0 offset:8 gds
-define amdgpu_kernel void @gws_barrier_wait_before(i32 %val, i32 addrspace(1)* %ptr) #0 {
- store i32 0, i32 addrspace(1)* %ptr
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
- ret void
-}
-
-; GCN-LABEL: {{^}}gws_barrier_wait_after:
-; GCN: ds_gws_barrier v0 offset:8 gds
-; GCN-NEXT: s_waitcnt expcnt(0){{$}}
-; GCN-NEXT: load_dword
-define amdgpu_kernel void @gws_barrier_wait_after(i32 %val, i32 addrspace(1)* %ptr) #0 {
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
- %load = load volatile i32, i32 addrspace(1)* %ptr
- ret void
-}
-
-; Does not imply memory fence on its own
-; GCN-LABEL: {{^}}gws_barrier_fence_before:
-; GCN: store_dword
-; GCN: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN: ds_gws_barrier v0 offset:8 gds
-define amdgpu_kernel void @gws_barrier_fence_before(i32 %val, i32 addrspace(1)* %ptr) #0 {
- store i32 0, i32 addrspace(1)* %ptr
- fence release
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
- ret void
-}
-
-; GCN-LABEL: {{^}}gws_barrier_fence_after:
-; GCN: ds_gws_barrier v0 offset:8 gds
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: load_dword
-define amdgpu_kernel void @gws_barrier_fence_after(i32 %val, i32 addrspace(1)* %ptr) #0 {
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
- fence release
- %load = load volatile i32, i32 addrspace(1)* %ptr
- ret void
-}
-
-; FIXME: Should a wait be inserted here, or is an explicit fence needed?
-; GCN-LABEL: {{^}}gws_init_barrier:
-; GCN: s_mov_b32 m0, -1
-; GCN: ds_gws_init v0 offset:8 gds
-; GCN-NEXT: ds_gws_barrier v0 offset:8 gds
-define amdgpu_kernel void @gws_init_barrier(i32 %val) #0 {
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7)
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
- ret void
-}
-
-; FIXME: Why vmcnt, not expcnt?
-; GCN-LABEL: {{^}}gws_init_fence_barrier:
-; GCN: s_mov_b32 m0, -1
-; GCN: ds_gws_init v0 offset:8 gds
-; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GCN-NEXT: ds_gws_barrier v0 offset:8 gds
-define amdgpu_kernel void @gws_init_fence_barrier(i32 %val) #0 {
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7)
- fence release
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 7)
- ret void
-}
-
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-declare i32 @llvm.amdgcn.workitem.id.x() #3
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent inaccessiblememonly nounwind }
-attributes #2 = { convergent inaccessiblememonly nounwind writeonly }
-attributes #3 = { nounwind readnone speculatable }
Removed: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll?rev=363796&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll (removed)
@@ -1,119 +0,0 @@
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s
-
-; Minimum offset
-; GCN-LABEL: {{^}}gws_init_offset0:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: s_mov_b32 m0, -1{{$}}
-; GCN: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_init v0 offset:1 gds{{$}}
-define amdgpu_kernel void @gws_init_offset0(i32 %val) #0 {
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0)
- ret void
-}
-
-; Maximum offset
-; GCN-LABEL: {{^}}gws_init_offset63:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: s_mov_b32 m0, -1{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_init v0 offset:64 gds{{$}}
-define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 {
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 63)
- ret void
-}
-
-; FIXME: Should be able to shift directly into m0
-; GCN-LABEL: {{^}}gws_init_sgpr_offset:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, s[[BAR_NUM]]
-; GCN: ds_gws_init v0 gds{{$}}
-define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
- ret void
-}
-
-; Variable offset in SGPR with constant add
-; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1:
-; GCN-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, s[[BAR_NUM]]
-; GCN: ds_gws_init v0 offset:1 gds{{$}}
-define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
- %offset = add i32 %offset.base, 1
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
- ret void
-}
-
-; GCN-LABEL: {{^}}gws_init_vgpr_offset:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_init v0 gds{{$}}
-define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
- %vgpr.offset = call i32 @llvm.amdgcn.workitem.id.x()
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %vgpr.offset)
- ret void
-}
-
-; Variable offset in VGPR with constant add
-; GCN-LABEL: {{^}}gws_init_vgpr_offset_add:
-; GCN-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
-; GCN-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; GCN-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; GCN-DAG: s_mov_b32 m0, [[SHL]]{{$}}
-; GCN-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
-; GCN: ds_gws_init v0 offset:3 gds{{$}}
-define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {
- %vgpr.offset.base = call i32 @llvm.amdgcn.workitem.id.x()
- %vgpr.offset = add i32 %vgpr.offset.base, 3
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %vgpr.offset)
- ret void
-}
-
- at lds = internal unnamed_addr addrspace(3) global i32 undef
-
-; Check if m0 initialization is shared.
-; GCN-LABEL: {{^}}gws_init_save_m0_init_constant_offset:
-; GCN: s_mov_b32 m0, -1
-; GCN-NOT: s_mov_b32 m0
-define amdgpu_kernel void @gws_init_save_m0_init_constant_offset(i32 %val) #0 {
- store i32 1, i32 addrspace(3)* @lds
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 10)
- store i32 2, i32 addrspace(3)* @lds
- ret void
-}
-
-; GCN-LABEL: {{^}}gws_init_lgkmcnt:
-; GCN: ds_gws_init v0 offset:1 gds{{$}}
-; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
-; GCN-NEXT: s_setpc_b64
-define void @gws_init_lgkmcnt(i32 %val) {
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 0)
- ret void
-}
-
-; Does not imply memory fence on its own
-; GCN-LABEL: {{^}}gws_init_wait_before:
-; GCN: store_dword
-; CIPLUS-NOT: s_waitcnt
-; GCN: ds_gws_init v0 offset:8 gds
-define amdgpu_kernel void @gws_init_wait_before(i32 %val, i32 addrspace(1)* %ptr) #0 {
- store i32 0, i32 addrspace(1)* %ptr
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 7)
- ret void
-}
-
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #1
-declare i32 @llvm.amdgcn.workitem.id.x() #2
-
-attributes #0 = { nounwind }
-attributes #1 = { convergent inaccessiblememonly nounwind writeonly }
-attributes #2 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/tail-duplication-convergent.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/tail-duplication-convergent.ll?rev=363797&r1=363796&r2=363797&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/tail-duplication-convergent.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/tail-duplication-convergent.ll Wed Jun 19 06:00:54 2019
@@ -6,8 +6,6 @@
declare void @nonconvergent_func() #0
declare void @convergent_func() #1
declare void @llvm.amdgcn.s.barrier() #1
-declare void @llvm.amdgcn.ds.gws.init(i32, i32) #2
-declare void @llvm.amdgcn.ds.gws.barrier(i32, i32) #2
; barrier shouldn't be duplicated.
@@ -102,52 +100,6 @@ call:
ret void
}
-; GCN-LABEL: {{^}}taildup_gws_init:
-; GCN: ds_gws_init
-; GCN-NOT: ds_gws_init
-define amdgpu_kernel void @taildup_gws_init(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %val, i32 %offset) #0 {
-entry:
- br i1 %cond, label %bb1, label %bb2
-
-bb1:
- store i32 0, i32 addrspace(1)* %a
- br label %call
-
-bb2:
- store i32 1, i32 addrspace(1)* %a
- br label %call
-
-call:
- call void @llvm.amdgcn.ds.gws.init(i32 %val, i32 %offset)
- br label %ret
-
-ret:
- ret void
-}
-
-; GCN-LABEL: {{^}}taildup_gws_barrier:
-; GCN: ds_gws_barrier
-; GCN-NOT: ds_gws_barrier
-define amdgpu_kernel void @taildup_gws_barrier(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i1 %cond, i32 %val, i32 %offset) #0 {
-entry:
- br i1 %cond, label %bb1, label %bb2
-
-bb1:
- store i32 0, i32 addrspace(1)* %a
- br label %call
-
-bb2:
- store i32 1, i32 addrspace(1)* %a
- br label %call
-
-call:
- call void @llvm.amdgcn.ds.gws.barrier(i32 %val, i32 %offset)
- br label %ret
-
-ret:
- ret void
-}
attributes #0 = { nounwind }
attributes #1 = { nounwind convergent }
-attributes #2 = { convergent inaccessiblememonly nounwind }
More information about the llvm-commits
mailing list