[llvm] r351351 - AMDGPU: Add llvm.amdgcn.ds.ordered.add & swap
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 17 05:45:18 PST 2019
Merged to 8.0 in r351443.
On Wed, Jan 16, 2019 at 4:47 PM Marek Olsak via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
> Author: mareko
> Date: Wed Jan 16 07:43:53 2019
> New Revision: 351351
>
> URL: http://llvm.org/viewvc/llvm-project?rev=351351&view=rev
> Log:
> AMDGPU: Add llvm.amdgcn.ds.ordered.add & swap
>
> Reviewers: arsenm, nhaehnle
>
> Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D52944
>
> Added:
> llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
> llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
> Modified:
> llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
> llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
> llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
> llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td
> llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
> llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
> llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
> llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
> llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
> llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
> llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
> llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Wed Jan 16 07:43:53 2019
> @@ -392,6 +392,24 @@ class AMDGPULDSF32Intrin<string clang_bu
> [IntrArgMemOnly, NoCapture<0>]
> >;
>
> +class AMDGPUDSOrderedIntrinsic : Intrinsic<
> + [llvm_i32_ty],
> + // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
> + // the bit packing can be optimized at the IR level.
> + [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
> + llvm_i32_ty, // value to add or swap
> + llvm_i32_ty, // ordering
> + llvm_i32_ty, // scope
> + llvm_i1_ty, // isVolatile
> + llvm_i32_ty, // ordered count index (OA index), also added to the address
> + llvm_i1_ty, // wave release, usually set to 1
> + llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
> + [NoCapture<0>]
> +>;
> +
> +def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
> +def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
> +
> def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
> def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
> def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Wed Jan 16 07:43:53 2019
> @@ -254,7 +254,7 @@ namespace AMDGPUAS {
>
> FLAT_ADDRESS = 0, ///< Address space for flat memory.
> GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
> - REGION_ADDRESS = 2, ///< Address space for region memory.
> + REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
>
> CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
> LOCAL_ADDRESS = 3, ///< Address space for local memory.
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Jan 16 07:43:53 2019
> @@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTar
> NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
> NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
> NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
> + NODE_NAME_CASE(DS_ORDERED_COUNT)
> NODE_NAME_CASE(ATOMIC_CMP_SWAP)
> NODE_NAME_CASE(ATOMIC_INC)
> NODE_NAME_CASE(ATOMIC_DEC)
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Wed Jan 16 07:43:53 2019
> @@ -474,6 +474,7 @@ enum NodeType : unsigned {
> TBUFFER_STORE_FORMAT_D16,
> TBUFFER_LOAD_FORMAT,
> TBUFFER_LOAD_FORMAT_D16,
> + DS_ORDERED_COUNT,
> ATOMIC_CMP_SWAP,
> ATOMIC_INC,
> ATOMIC_DEC,
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td Wed Jan 16 07:43:53 2019
> @@ -72,6 +72,8 @@ def : SourceOfDivergence<int_amdgcn_buff
> def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
> def : SourceOfDivergence<int_amdgcn_ps_live>;
> def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
> +def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
> +def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
>
> foreach intr = AMDGPUImageDimAtomicIntrinsics in
> def : SourceOfDivergence<intr>;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Wed Jan 16 07:43:53 2019
> @@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(Intr
> switch (Inst->getIntrinsicID()) {
> case Intrinsic::amdgcn_atomic_inc:
> case Intrinsic::amdgcn_atomic_dec:
> + case Intrinsic::amdgcn_ds_ordered_add:
> + case Intrinsic::amdgcn_ds_ordered_swap:
> case Intrinsic::amdgcn_ds_fadd:
> case Intrinsic::amdgcn_ds_fmin:
> case Intrinsic::amdgcn_ds_fmax: {
>
> Modified: llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/DSInstructions.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/DSInstructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/DSInstructions.td Wed Jan 16 07:43:53 2019
> @@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64,
>
> defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
>
> +def : Pat <
> + (SIds_ordered_count i32:$value, i16:$offset),
> + (DS_ORDERED_COUNT $value, (as_i16imm $offset))
> +>;
> +
> //===----------------------------------------------------------------------===//
> // Real instructions
> //===----------------------------------------------------------------------===//
>
> Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Wed Jan 16 07:43:53 2019
> @@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) {
> }
> }
>
> -static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
> +static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
> + const MachineInstr &MI) {
> + if (TII.isAlwaysGDS(MI.getOpcode()))
> + return true;
> +
> switch (MI.getOpcode()) {
> case AMDGPU::S_SENDMSG:
> case AMDGPU::S_SENDMSGHALT:
> case AMDGPU::S_TTRACEDATA:
> return true;
> + // These DS opcodes don't support GDS.
> + case AMDGPU::DS_NOP:
> + case AMDGPU::DS_PERMUTE_B32:
> + case AMDGPU::DS_BPERMUTE_B32:
> + return false;
> default:
> - // TODO: GDS
> + if (TII.isDS(MI.getOpcode())) {
> + int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
> + AMDGPU::OpName::gds);
> + if (MI.getOperand(GDS).getImm())
> + return true;
> + }
> return false;
> }
> }
> @@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit
> checkReadM0Hazards(MI) > 0)
> return NoopHazard;
>
> - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
> + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
> checkReadM0Hazards(MI) > 0)
> return NoopHazard;
>
> @@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoo
> isSMovRel(MI->getOpcode())))
> return std::max(WaitStates, checkReadM0Hazards(MI));
>
> - if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
> + if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
> return std::max(WaitStates, checkReadM0Hazards(MI));
>
> return WaitStates;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Jan 16 07:43:53 2019
> @@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsi
> switch (IntrID) {
> case Intrinsic::amdgcn_atomic_inc:
> case Intrinsic::amdgcn_atomic_dec:
> + case Intrinsic::amdgcn_ds_ordered_add:
> + case Intrinsic::amdgcn_ds_ordered_swap:
> case Intrinsic::amdgcn_ds_fadd:
> case Intrinsic::amdgcn_ds_fmin:
> case Intrinsic::amdgcn_ds_fmax: {
> @@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArgume
> switch (II->getIntrinsicID()) {
> case Intrinsic::amdgcn_atomic_inc:
> case Intrinsic::amdgcn_atomic_dec:
> + case Intrinsic::amdgcn_ds_ordered_add:
> + case Intrinsic::amdgcn_ds_ordered_swap:
> case Intrinsic::amdgcn_ds_fadd:
> case Intrinsic::amdgcn_ds_fmin:
> case Intrinsic::amdgcn_ds_fmax: {
> @@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC
> SDLoc DL(Op);
>
> switch (IntrID) {
> + case Intrinsic::amdgcn_ds_ordered_add:
> + case Intrinsic::amdgcn_ds_ordered_swap: {
> + MemSDNode *M = cast<MemSDNode>(Op);
> + SDValue Chain = M->getOperand(0);
> + SDValue M0 = M->getOperand(2);
> + SDValue Value = M->getOperand(3);
> + unsigned OrderedCountIndex = M->getConstantOperandVal(7);
> + unsigned WaveRelease = M->getConstantOperandVal(8);
> + unsigned WaveDone = M->getConstantOperandVal(9);
> + unsigned ShaderType;
> + unsigned Instruction;
> +
> + switch (IntrID) {
> + case Intrinsic::amdgcn_ds_ordered_add:
> + Instruction = 0;
> + break;
> + case Intrinsic::amdgcn_ds_ordered_swap:
> + Instruction = 1;
> + break;
> + }
> +
> + if (WaveDone && !WaveRelease)
> + report_fatal_error("ds_ordered_count: wave_done requires wave_release");
> +
> + switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
> + case CallingConv::AMDGPU_CS:
> + case CallingConv::AMDGPU_KERNEL:
> + ShaderType = 0;
> + break;
> + case CallingConv::AMDGPU_PS:
> + ShaderType = 1;
> + break;
> + case CallingConv::AMDGPU_VS:
> + ShaderType = 2;
> + break;
> + case CallingConv::AMDGPU_GS:
> + ShaderType = 3;
> + break;
> + default:
> + report_fatal_error("ds_ordered_count unsupported for this calling conv");
> + }
> +
> + unsigned Offset0 = OrderedCountIndex << 2;
> + unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
> + (Instruction << 4);
> + unsigned Offset = Offset0 | (Offset1 << 8);
> +
> + SDValue Ops[] = {
> + Chain,
> + Value,
> + DAG.getTargetConstant(Offset, DL, MVT::i16),
> + copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
> + };
> + return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
> + M->getVTList(), Ops, M->getMemoryVT(),
> + M->getMemOperand());
> + }
> case Intrinsic::amdgcn_atomic_inc:
> case Intrinsic::amdgcn_atomic_dec:
> case Intrinsic::amdgcn_ds_fadd:
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed Jan 16 07:43:53 2019
> @@ -536,10 +536,13 @@ void WaitcntBrackets::updateByEvent(cons
> CurrScore);
> }
> if (Inst.mayStore()) {
> - setExpScore(
> - &Inst, TII, TRI, MRI,
> - AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
> - CurrScore);
> + if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
> + AMDGPU::OpName::data0) != -1) {
> + setExpScore(
> + &Inst, TII, TRI, MRI,
> + AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
> + CurrScore);
> + }
> if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
> AMDGPU::OpName::data1) != -1) {
> setExpScore(&Inst, TII, TRI, MRI,
> @@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcn
> // bracket and the destination operand scores.
> // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
> if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
> - if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
> + if (TII->isAlwaysGDS(Inst.getOpcode()) ||
> + TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
> ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
> ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
> } else {
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed Jan 16 07:43:53 2019
> @@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(c
> changesVGPRIndexingMode(MI);
> }
>
> +bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
> + return Opcode == AMDGPU::DS_ORDERED_COUNT ||
> + Opcode == AMDGPU::DS_GWS_INIT ||
> + Opcode == AMDGPU::DS_GWS_SEMA_V ||
> + Opcode == AMDGPU::DS_GWS_SEMA_BR ||
> + Opcode == AMDGPU::DS_GWS_SEMA_P ||
> + Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
> + Opcode == AMDGPU::DS_GWS_BARRIER;
> +}
> +
> bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
> unsigned Opcode = MI.getOpcode();
>
> @@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhen
> // EXEC = 0, but checking for that case here seems not worth it
> // given the typical code patterns.
> if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
> - Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
> + Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
> + Opcode == AMDGPU::DS_ORDERED_COUNT)
> return true;
>
> if (MI.isInlineAsm())
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Wed Jan 16 07:43:53 2019
> @@ -450,6 +450,8 @@ public:
> return get(Opcode).TSFlags & SIInstrFlags::DS;
> }
>
> + bool isAlwaysGDS(uint16_t Opcode) const;
> +
> static bool isMIMG(const MachineInstr &MI) {
> return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
> }
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Jan 16 07:43:53 2019
> @@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::
> [SDNPMayLoad, SDNPMemOperand]
> >;
>
> +def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
> + SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
> + [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
> +>;
> +
> def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
> [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
> >;
>
> Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll?rev=351351&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll (added)
> +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll Wed Jan 16 07:43:53 2019
> @@ -0,0 +1,96 @@
> +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
> +define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> + store i32 %val, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; Below are various modifications of input operands and shader types.
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_counter2:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds
> +define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true)
> + store i32 %val, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_nodone:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds
> +define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false)
> + store i32 %val, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_norelease:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds
> +define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false)
> + store i32 %val, i32 addrspace(1)* %out
> + ret void
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_cs:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> + %r = bitcast i32 %val to float
> + ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_ps:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> + %r = bitcast i32 %val to float
> + ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_vs:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> + %r = bitcast i32 %val to float
> + ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_gs:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> + %r = bitcast i32 %val to float
> + ret float %r
> +}
> +
> +declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
>
> Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll?rev=351351&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll (added)
> +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll Wed Jan 16 07:43:53 2019
> @@ -0,0 +1,45 @@
> +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +
> +; FUNC-LABEL: {{^}}ds_ordered_swap:
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) {
> + %val = call i32 at llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> + %r = bitcast i32 %val to float
> + ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_swap_conditional:
> +; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
> +; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
> +; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
> +; GCN: s_cbranch_execz [[BB:BB._.]]
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
> +; GCN-NEXT: [[BB]]:
> +; // Wait for expcnt(0) before modifying EXEC
> +; GCN-NEXT: s_waitcnt expcnt(0)
> +; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]]
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) {
> +entry:
> + %c = icmp ne i32 %value, 0
> + br i1 %c, label %if-true, label %endif
> +
> +if-true:
> + %val = call i32 at llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> + br label %endif
> +
> +endif:
> + %v = phi i32 [ %val, %if-true ], [ undef, %entry ]
> + %r = bitcast i32 %v to float
> + ret float %r
> +}
> +
> +declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list