[llvm] r351351 - AMDGPU: Add llvm.amdgcn.ds.ordered.add & swap

Thu Jan 17 05:45:18 PST 2019

Merged to 8.0 in r351443.

On Wed, Jan 16, 2019 at 4:47 PM Marek Olsak via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
>
> Author: mareko
> Date: Wed Jan 16 07:43:53 2019
> New Revision: 351351
>
> URL: http://llvm.org/viewvc/llvm-project?rev=351351&view=rev
> Log:
> AMDGPU: Add llvm.amdgcn.ds.ordered.add & swap
>
> Reviewers: arsenm, nhaehnle
>
> Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D52944
>
> Added:
>     llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
>     llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
> Modified:
>     llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
>     llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td
>     llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
>     llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
>     llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
>     llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
>     llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
>     llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
>     llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
>     llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
>
> Modified: llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td (original)
> +++ llvm/trunk/include/llvm/IR/IntrinsicsAMDGPU.td Wed Jan 16 07:43:53 2019
> @@ -392,6 +392,24 @@ class AMDGPULDSF32Intrin<string clang_bu
>      [IntrArgMemOnly, NoCapture<0>]
>  >;
>
> +class AMDGPUDSOrderedIntrinsic : Intrinsic<
> +  [llvm_i32_ty],
> +  // M0 = {hi16:address, lo16:waveID}. Allow passing M0 as a pointer, so that
> +  // the bit packing can be optimized at the IR level.
> +  [LLVMQualPointerType<llvm_i32_ty, 2>, // IntToPtr(M0)
> +   llvm_i32_ty, // value to add or swap
> +   llvm_i32_ty, // ordering
> +   llvm_i32_ty, // scope
> +   llvm_i1_ty,  // isVolatile
> +   llvm_i32_ty, // ordered count index (OA index), also added to the address
> +   llvm_i1_ty,  // wave release, usually set to 1
> +   llvm_i1_ty], // wave done, set to 1 for the last ordered instruction
> +  [NoCapture<0>]
> +>;
> +
> +def int_amdgcn_ds_ordered_add : AMDGPUDSOrderedIntrinsic;
> +def int_amdgcn_ds_ordered_swap : AMDGPUDSOrderedIntrinsic;
> +
>  def int_amdgcn_ds_fadd : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_faddf">;
>  def int_amdgcn_ds_fmin : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fminf">;
>  def int_amdgcn_ds_fmax : AMDGPULDSF32Intrin<"__builtin_amdgcn_ds_fmaxf">;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.h?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPU.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.h Wed Jan 16 07:43:53 2019
> @@ -254,7 +254,7 @@ namespace AMDGPUAS {
>
>      FLAT_ADDRESS = 0,     ///< Address space for flat memory.
>      GLOBAL_ADDRESS = 1,   ///< Address space for global memory (RAT0, VTX0).
> -    REGION_ADDRESS = 2,   ///< Address space for region memory.
> +    REGION_ADDRESS = 2,   ///< Address space for region memory. (GDS)
>
>      CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2)
>      LOCAL_ADDRESS = 3,    ///< Address space for local memory.
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Jan 16 07:43:53 2019
> @@ -4192,6 +4192,7 @@ const char* AMDGPUTargetLowering::getTar
>    NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16)
>    NODE_NAME_CASE(TBUFFER_LOAD_FORMAT)
>    NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16)
> +  NODE_NAME_CASE(DS_ORDERED_COUNT)
>    NODE_NAME_CASE(ATOMIC_CMP_SWAP)
>    NODE_NAME_CASE(ATOMIC_INC)
>    NODE_NAME_CASE(ATOMIC_DEC)
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Wed Jan 16 07:43:53 2019
> @@ -474,6 +474,7 @@ enum NodeType : unsigned {
>    TBUFFER_STORE_FORMAT_D16,
>    TBUFFER_LOAD_FORMAT,
>    TBUFFER_LOAD_FORMAT_D16,
> +  DS_ORDERED_COUNT,
>    ATOMIC_CMP_SWAP,
>    ATOMIC_INC,
>    ATOMIC_DEC,
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSearchableTables.td Wed Jan 16 07:43:53 2019
> @@ -72,6 +72,8 @@ def : SourceOfDivergence<int_amdgcn_buff
>  def : SourceOfDivergence<int_amdgcn_buffer_atomic_cmpswap>;
>  def : SourceOfDivergence<int_amdgcn_ps_live>;
>  def : SourceOfDivergence<int_amdgcn_ds_swizzle>;
> +def : SourceOfDivergence<int_amdgcn_ds_ordered_add>;
> +def : SourceOfDivergence<int_amdgcn_ds_ordered_swap>;
>
>  foreach intr = AMDGPUImageDimAtomicIntrinsics in
>  def : SourceOfDivergence<intr>;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp Wed Jan 16 07:43:53 2019
> @@ -308,6 +308,8 @@ bool GCNTTIImpl::getTgtMemIntrinsic(Intr
>    switch (Inst->getIntrinsicID()) {
>    case Intrinsic::amdgcn_atomic_inc:
>    case Intrinsic::amdgcn_atomic_dec:
> +  case Intrinsic::amdgcn_ds_ordered_add:
> +  case Intrinsic::amdgcn_ds_ordered_swap:
>    case Intrinsic::amdgcn_ds_fadd:
>    case Intrinsic::amdgcn_ds_fmin:
>    case Intrinsic::amdgcn_ds_fmax: {
>
> Modified: llvm/trunk/lib/Target/AMDGPU/DSInstructions.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/DSInstructions.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/DSInstructions.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/DSInstructions.td Wed Jan 16 07:43:53 2019
> @@ -817,6 +817,11 @@ defm : DSAtomicRetPat_mc<DS_MAX_RTN_U64,
>
>  defm : DSAtomicCmpXChg_mc<DS_CMPST_RTN_B64, i64, "atomic_cmp_swap_local">;
>
> +def : Pat <
> +  (SIds_ordered_count i32:$value, i16:$offset),
> +  (DS_ORDERED_COUNT $value, (as_i16imm $offset))
> +>;
> +
>  //===----------------------------------------------------------------------===//
>  // Real instructions
>  //===----------------------------------------------------------------------===//
>
> Modified: llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/GCNHazardRecognizer.cpp Wed Jan 16 07:43:53 2019
> @@ -88,14 +88,28 @@ static bool isSMovRel(unsigned Opcode) {
>    }
>  }
>
> -static bool isSendMsgTraceDataOrGDS(const MachineInstr &MI) {
> +static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
> +                                    const MachineInstr &MI) {
> +  if (TII.isAlwaysGDS(MI.getOpcode()))
> +    return true;
> +
>    switch (MI.getOpcode()) {
>    case AMDGPU::S_SENDMSG:
>    case AMDGPU::S_SENDMSGHALT:
>    case AMDGPU::S_TTRACEDATA:
>      return true;
> +  // These DS opcodes don't support GDS.
> +  case AMDGPU::DS_NOP:
> +  case AMDGPU::DS_PERMUTE_B32:
> +  case AMDGPU::DS_BPERMUTE_B32:
> +    return false;
>    default:
> -    // TODO: GDS
> +    if (TII.isDS(MI.getOpcode())) {
> +      int GDS = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
> +                                           AMDGPU::OpName::gds);
> +      if (MI.getOperand(GDS).getImm())
> +        return true;
> +    }
>      return false;
>    }
>  }
> @@ -145,7 +159,7 @@ GCNHazardRecognizer::getHazardType(SUnit
>        checkReadM0Hazards(MI) > 0)
>      return NoopHazard;
>
> -  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI) &&
> +  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI) &&
>        checkReadM0Hazards(MI) > 0)
>      return NoopHazard;
>
> @@ -199,7 +213,7 @@ unsigned GCNHazardRecognizer::PreEmitNoo
>                                             isSMovRel(MI->getOpcode())))
>      return std::max(WaitStates, checkReadM0Hazards(MI));
>
> -  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(*MI))
> +  if (ST.hasReadM0SendMsgHazard() && isSendMsgTraceDataOrGDS(TII, *MI))
>      return std::max(WaitStates, checkReadM0Hazards(MI));
>
>    return WaitStates;
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Jan 16 07:43:53 2019
> @@ -910,6 +910,8 @@ bool SITargetLowering::getTgtMemIntrinsi
>    switch (IntrID) {
>    case Intrinsic::amdgcn_atomic_inc:
>    case Intrinsic::amdgcn_atomic_dec:
> +  case Intrinsic::amdgcn_ds_ordered_add:
> +  case Intrinsic::amdgcn_ds_ordered_swap:
>    case Intrinsic::amdgcn_ds_fadd:
>    case Intrinsic::amdgcn_ds_fmin:
>    case Intrinsic::amdgcn_ds_fmax: {
> @@ -937,6 +939,8 @@ bool SITargetLowering::getAddrModeArgume
>    switch (II->getIntrinsicID()) {
>    case Intrinsic::amdgcn_atomic_inc:
>    case Intrinsic::amdgcn_atomic_dec:
> +  case Intrinsic::amdgcn_ds_ordered_add:
> +  case Intrinsic::amdgcn_ds_ordered_swap:
>    case Intrinsic::amdgcn_ds_fadd:
>    case Intrinsic::amdgcn_ds_fmin:
>    case Intrinsic::amdgcn_ds_fmax: {
> @@ -5438,6 +5442,63 @@ SDValue SITargetLowering::LowerINTRINSIC
>    SDLoc DL(Op);
>
>    switch (IntrID) {
> +  case Intrinsic::amdgcn_ds_ordered_add:
> +  case Intrinsic::amdgcn_ds_ordered_swap: {
> +    MemSDNode *M = cast<MemSDNode>(Op);
> +    SDValue Chain = M->getOperand(0);
> +    SDValue M0 = M->getOperand(2);
> +    SDValue Value = M->getOperand(3);
> +    unsigned OrderedCountIndex = M->getConstantOperandVal(7);
> +    unsigned WaveRelease = M->getConstantOperandVal(8);
> +    unsigned WaveDone = M->getConstantOperandVal(9);
> +    unsigned ShaderType;
> +    unsigned Instruction;
> +
> +    switch (IntrID) {
> +    case Intrinsic::amdgcn_ds_ordered_add:
> +      Instruction = 0;
> +      break;
> +    case Intrinsic::amdgcn_ds_ordered_swap:
> +      Instruction = 1;
> +      break;
> +    }
> +
> +    if (WaveDone && !WaveRelease)
> +      report_fatal_error("ds_ordered_count: wave_done requires wave_release");
> +
> +    switch (DAG.getMachineFunction().getFunction().getCallingConv()) {
> +    case CallingConv::AMDGPU_CS:
> +    case CallingConv::AMDGPU_KERNEL:
> +      ShaderType = 0;
> +      break;
> +    case CallingConv::AMDGPU_PS:
> +      ShaderType = 1;
> +      break;
> +    case CallingConv::AMDGPU_VS:
> +      ShaderType = 2;
> +      break;
> +    case CallingConv::AMDGPU_GS:
> +      ShaderType = 3;
> +      break;
> +    default:
> +      report_fatal_error("ds_ordered_count unsupported for this calling conv");
> +    }
> +
> +    unsigned Offset0 = OrderedCountIndex << 2;
> +    unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
> +                       (Instruction << 4);
> +    unsigned Offset = Offset0 | (Offset1 << 8);
> +
> +    SDValue Ops[] = {
> +      Chain,
> +      Value,
> +      DAG.getTargetConstant(Offset, DL, MVT::i16),
> +      copyToM0(DAG, Chain, DL, M0).getValue(1), // Glue
> +    };
> +    return DAG.getMemIntrinsicNode(AMDGPUISD::DS_ORDERED_COUNT, DL,
> +                                   M->getVTList(), Ops, M->getMemoryVT(),
> +                                   M->getMemOperand());
> +  }
>    case Intrinsic::amdgcn_atomic_inc:
>    case Intrinsic::amdgcn_atomic_dec:
>    case Intrinsic::amdgcn_ds_fadd:
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed Jan 16 07:43:53 2019
> @@ -536,10 +536,13 @@ void WaitcntBrackets::updateByEvent(cons
>              CurrScore);
>        }
>        if (Inst.mayStore()) {
> -        setExpScore(
> -            &Inst, TII, TRI, MRI,
> -            AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
> -            CurrScore);
> +        if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
> +                                       AMDGPU::OpName::data0) != -1) {
> +          setExpScore(
> +              &Inst, TII, TRI, MRI,
> +              AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::data0),
> +              CurrScore);
> +        }
>          if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
>                                         AMDGPU::OpName::data1) != -1) {
>            setExpScore(&Inst, TII, TRI, MRI,
> @@ -1093,7 +1096,8 @@ void SIInsertWaitcnts::updateEventWaitcn
>    // bracket and the destination operand scores.
>    // TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
>    if (TII->isDS(Inst) && TII->usesLGKM_CNT(Inst)) {
> -    if (TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
> +    if (TII->isAlwaysGDS(Inst.getOpcode()) ||
> +        TII->hasModifiersSet(Inst, AMDGPU::OpName::gds)) {
>        ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
>        ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
>      } else {
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed Jan 16 07:43:53 2019
> @@ -2390,6 +2390,16 @@ bool SIInstrInfo::isSchedulingBoundary(c
>           changesVGPRIndexingMode(MI);
>  }
>
> +bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
> +  return Opcode == AMDGPU::DS_ORDERED_COUNT ||
> +         Opcode == AMDGPU::DS_GWS_INIT ||
> +         Opcode == AMDGPU::DS_GWS_SEMA_V ||
> +         Opcode == AMDGPU::DS_GWS_SEMA_BR ||
> +         Opcode == AMDGPU::DS_GWS_SEMA_P ||
> +         Opcode == AMDGPU::DS_GWS_SEMA_RELEASE_ALL ||
> +         Opcode == AMDGPU::DS_GWS_BARRIER;
> +}
> +
>  bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
>    unsigned Opcode = MI.getOpcode();
>
> @@ -2403,7 +2413,8 @@ bool SIInstrInfo::hasUnwantedEffectsWhen
>    //       EXEC = 0, but checking for that case here seems not worth it
>    //       given the typical code patterns.
>    if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
> -      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE)
> +      Opcode == AMDGPU::EXP || Opcode == AMDGPU::EXP_DONE ||
> +      Opcode == AMDGPU::DS_ORDERED_COUNT)
>      return true;
>
>    if (MI.isInlineAsm())
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.h Wed Jan 16 07:43:53 2019
> @@ -450,6 +450,8 @@ public:
>      return get(Opcode).TSFlags & SIInstrFlags::DS;
>    }
>
> +  bool isAlwaysGDS(uint16_t Opcode) const;
> +
>    static bool isMIMG(const MachineInstr &MI) {
>      return MI.getDesc().TSFlags & SIInstrFlags::MIMG;
>    }
>
> Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=351351&r1=351350&r2=351351&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Jan 16 07:43:53 2019
> @@ -45,6 +45,11 @@ def SIsbuffer_load : SDNode<"AMDGPUISD::
>    [SDNPMayLoad, SDNPMemOperand]
>  >;
>
> +def SIds_ordered_count : SDNode<"AMDGPUISD::DS_ORDERED_COUNT",
> +  SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i16>]>,
> +  [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain, SDNPInGlue]
> +>;
> +
>  def SIatomic_inc : SDNode<"AMDGPUISD::ATOMIC_INC", SDTAtomic2,
>    [SDNPMayLoad, SDNPMayStore, SDNPMemOperand, SDNPHasChain]
>  >;
>
> Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll?rev=351351&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll (added)
> +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.add.ll Wed Jan 16 07:43:53 2019
> @@ -0,0 +1,96 @@
> +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
> +define amdgpu_kernel void @ds_ordered_add(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; Below are various modifications of input operands and shader types.
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_counter2:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:776 gds
> +define amdgpu_kernel void @ds_ordered_add_counter2(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 2, i1 true, i1 true)
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_nodone:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:260 gds
> +define amdgpu_kernel void @ds_ordered_add_nodone(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 false)
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_norelease:
> +; GCN-DAG: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN-DAG: s_mov_b32 m0,
> +; GCN: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:4 gds
> +define amdgpu_kernel void @ds_ordered_add_norelease(i32 addrspace(2)* inreg %gds, i32 addrspace(1)* %out) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 false, i1 false)
> +  store i32 %val, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_cs:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:772 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_cs float @ds_ordered_add_cs(i32 addrspace(2)* inreg %gds) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> +  %r = bitcast i32 %val to float
> +  ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_ps:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:1796 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_ps float @ds_ordered_add_ps(i32 addrspace(2)* inreg %gds) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> +  %r = bitcast i32 %val to float
> +  ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_vs:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:2820 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_vs float @ds_ordered_add_vs(i32 addrspace(2)* inreg %gds) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> +  %r = bitcast i32 %val to float
> +  ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_add_gs:
> +; GCN: v_mov_b32_e32 v[[INCR:[0-9]+]], 31
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[INCR]] offset:3844 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_gs float @ds_ordered_add_gs(i32 addrspace(2)* inreg %gds) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* %gds, i32 31, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> +  %r = bitcast i32 %val to float
> +  ret float %r
> +}
> +
> +declare i32 @llvm.amdgcn.ds.ordered.add(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
>
> Added: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll?rev=351351&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll (added)
> +++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll Wed Jan 16 07:43:53 2019
> @@ -0,0 +1,45 @@
> +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %s
> +
> +; FUNC-LABEL: {{^}}ds_ordered_swap:
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
> +; GCN-NEXT: s_waitcnt expcnt(0) lgkmcnt(0)
> +define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value) {
> +  %val = call i32 at llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> +  %r = bitcast i32 %val to float
> +  ret float %r
> +}
> +
> +; FUNC-LABEL: {{^}}ds_ordered_swap_conditional:
> +; GCN: v_cmp_ne_u32_e32 vcc, 0, v0
> +; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
> +; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
> +; GCN: s_cbranch_execz [[BB:BB._.]]
> +; GCN: s_mov_b32 m0, s0
> +; VIGFX9-NEXT: s_nop 0
> +; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v0 offset:4868 gds
> +; GCN-NEXT: [[BB]]:
> +; // Wait for expcnt(0) before modifying EXEC
> +; GCN-NEXT: s_waitcnt expcnt(0)
> +; GCN-NEXT: s_or_b64 exec, exec, s[[SAVED]]
> +; GCN-NEXT: s_waitcnt lgkmcnt(0)
> +define amdgpu_cs float @ds_ordered_swap_conditional(i32 addrspace(2)* inreg %gds, i32 %value) {
> +entry:
> +  %c = icmp ne i32 %value, 0
> +  br i1 %c, label %if-true, label %endif
> +
> +if-true:
> +  %val = call i32 at llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* %gds, i32 %value, i32 0, i32 0, i1 false, i32 1, i1 true, i1 true)
> +  br label %endif
> +
> +endif:
> +  %v = phi i32 [ %val, %if-true ], [ undef, %entry ]
> +  %r = bitcast i32 %v to float
> +  ret float %r
> +}
> +
> +declare i32 @llvm.amdgcn.ds.ordered.swap(i32 addrspace(2)* nocapture, i32, i32, i32, i1, i32, i1, i1)
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits