[llvm] 2038014 - AMDGPU/GlobalISel: Select llvm.amdgcn.ds.ordered.{add|swap}

Mon Jan 13 10:09:44 PST 2020

Author: Matt Arsenault
Date: 2020-01-13T13:09:38-05:00
New Revision: 203801425d222555fa2617fff19ecd861525429f

URL: https://github.com/llvm/llvm-project/commit/203801425d222555fa2617fff19ecd861525429f
DIFF: https://github.com/llvm/llvm-project/commit/203801425d222555fa2617fff19ecd861525429f.diff

LOG: AMDGPU/GlobalISel: Select llvm.amdgcn.ds.ordered.{add|swap}

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 5dacc0993fc9..c0ea35817ec8 100644

--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1022,6 +1022,90 @@ bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
   return constrainSelectedInstRegOperands(*MIB, TII, TRI, RBI);
 }
 
+static unsigned getDSShaderTypeValue(const MachineFunction &MF) {
+  switch (MF.getFunction().getCallingConv()) {
+  case CallingConv::AMDGPU_PS:
+    return 1;
+  case CallingConv::AMDGPU_VS:
+    return 2;
+  case CallingConv::AMDGPU_GS:
+    return 3;
+  case CallingConv::AMDGPU_HS:
+  case CallingConv::AMDGPU_LS:
+  case CallingConv::AMDGPU_ES:
+    report_fatal_error("ds_ordered_count unsupported for this calling conv");
+  case CallingConv::AMDGPU_CS:
+  case CallingConv::AMDGPU_KERNEL:
+  case CallingConv::C:
+  case CallingConv::Fast:
+  default:
+    // Assume other calling conventions are various compute callable functions
+    return 0;
+  }
+}
+
+bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
+  MachineInstr &MI, Intrinsic::ID IntrID) const {
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineFunction *MF = MBB->getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  unsigned IndexOperand = MI.getOperand(7).getImm();
+  bool WaveRelease = MI.getOperand(8).getImm() != 0;
+  bool WaveDone = MI.getOperand(9).getImm() != 0;
+
+  if (WaveDone && !WaveRelease)
+    report_fatal_error("ds_ordered_count: wave_done requires wave_release");
+
+  unsigned OrderedCountIndex = IndexOperand & 0x3f;
+  IndexOperand &= ~0x3f;
+  unsigned CountDw = 0;
+
+  if (STI.getGeneration() >= AMDGPUSubtarget::GFX10) {
+    CountDw = (IndexOperand >> 24) & 0xf;
+    IndexOperand &= ~(0xf << 24);
+
+    if (CountDw < 1 || CountDw > 4) {
+      report_fatal_error(
+        "ds_ordered_count: dword count must be between 1 and 4");
+    }
+  }
+
+  if (IndexOperand)
+    report_fatal_error("ds_ordered_count: bad index operand");
+
+  unsigned Instruction = IntrID == Intrinsic::amdgcn_ds_ordered_add ? 0 : 1;
+  unsigned ShaderType = getDSShaderTypeValue(*MF);
+
+  unsigned Offset0 = OrderedCountIndex << 2;
+  unsigned Offset1 = WaveRelease | (WaveDone << 1) | (ShaderType << 2) |
+                     (Instruction << 4);
+
+  if (STI.getGeneration() >= AMDGPUSubtarget::GFX10)
+    Offset1 |= (CountDw - 1) << 6;
+
+  unsigned Offset = Offset0 | (Offset1 << 8);
+
+  Register M0Val = MI.getOperand(2).getReg();
+  BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+    .addReg(M0Val);
+
+  Register DstReg = MI.getOperand(0).getReg();
+  Register ValReg = MI.getOperand(3).getReg();
+  MachineInstrBuilder DS =
+    BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::DS_ORDERED_COUNT), DstReg)
+      .addReg(ValReg)
+      .addImm(Offset)
+      .cloneMemRefs(MI);
+
+  if (!RBI.constrainGenericRegister(M0Val, AMDGPU::SReg_32RegClass, *MRI))
+    return false;
+
+  bool Ret = constrainSelectedInstRegOperands(*DS, TII, TRI, RBI);
+  MI.eraseFromParent();
+  return Ret;
+}
+
 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
@@ -1077,6 +1161,9 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     return selectStoreIntrinsic(I, false);
   case Intrinsic::amdgcn_raw_buffer_store_format:
     return selectStoreIntrinsic(I, true);
+  case Intrinsic::amdgcn_ds_ordered_add:
+  case Intrinsic::amdgcn_ds_ordered_swap:
+    return selectDSOrderedIntrinsic(I, IntrinsicID);
   default:
     return selectImpl(I, *CoverageInfo);
   }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index d884afbe7707..38ca7fd4104b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -100,6 +100,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
   splitBufferOffsets(MachineIRBuilder &B, Register OrigOffset) const;
 
   bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
+  bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
 
   bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
   int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll
new file mode 100644
index 000000000000..9a287359d4db
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.gfx10.ll
@@ -0,0 +1 @@
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -show-mc-encoding < %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll | FileCheck -check-prefixes=GCN %S/../llvm.amdgcn.ds.ordered.add.gfx10.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
new file mode 100644
index 000000000000..8cba08f016da
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
@@ -0,0 +1,5 @@
+; FIXME: Broken SI run line
+; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
new file mode 100644
index 000000000000..28c2c7a4e9bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
@@ -0,0 +1,5 @@
+; FIXME: Broken SI run line
+; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
+; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll