[llvm] 9b2f353 - AMDGPU/GlobalISel: Select DS GWS intrinsics

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 16 08:25:16 PST 2020


Author: Matt Arsenault
Date: 2020-01-16T11:25:10-05:00
New Revision: 9b2f3532c7ffa664a004714731752733d914e2d4

URL: https://github.com/llvm/llvm-project/commit/9b2f3532c7ffa664a004714731752733d914e2d4
DIFF: https://github.com/llvm/llvm-project/commit/9b2f3532c7ffa664a004714731752733d914e2d4.diff

LOG: AMDGPU/GlobalISel: Select DS GWS intrinsics

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll
    llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
    llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
    llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 873ac9bb0bae..5ab8d2d84792 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1088,6 +1088,115 @@ bool AMDGPUInstructionSelector::selectDSOrderedIntrinsic(
   return Ret;
 }
 
+static unsigned gwsIntrinToOpcode(unsigned IntrID) {
+  switch (IntrID) {
+  case Intrinsic::amdgcn_ds_gws_init:
+    return AMDGPU::DS_GWS_INIT;
+  case Intrinsic::amdgcn_ds_gws_barrier:
+    return AMDGPU::DS_GWS_BARRIER;
+  case Intrinsic::amdgcn_ds_gws_sema_v:
+    return AMDGPU::DS_GWS_SEMA_V;
+  case Intrinsic::amdgcn_ds_gws_sema_br:
+    return AMDGPU::DS_GWS_SEMA_BR;
+  case Intrinsic::amdgcn_ds_gws_sema_p:
+    return AMDGPU::DS_GWS_SEMA_P;
+  case Intrinsic::amdgcn_ds_gws_sema_release_all:
+    return AMDGPU::DS_GWS_SEMA_RELEASE_ALL;
+  default:
+    llvm_unreachable("not a gws intrinsic");
+  }
+}
+
+bool AMDGPUInstructionSelector::selectDSGWSIntrinsic(MachineInstr &MI,
+                                                     Intrinsic::ID IID) const {
+  if (IID == Intrinsic::amdgcn_ds_gws_sema_release_all &&
+      !STI.hasGWSSemaReleaseAll())
+    return false;
+
+  // intrinsic ID, vsrc, offset
+  const bool HasVSrc = MI.getNumOperands() == 3;
+  assert(HasVSrc || MI.getNumOperands() == 2);
+
+  Register BaseOffset = MI.getOperand(HasVSrc ? 2 : 1).getReg();
+  const RegisterBank *OffsetRB = RBI.getRegBank(BaseOffset, *MRI, TRI);
+  if (OffsetRB->getID() != AMDGPU::SGPRRegBankID)
+    return false;
+
+  MachineInstr *OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
+  assert(OffsetDef);
+
+  unsigned ImmOffset;
+
+  MachineBasicBlock *MBB = MI.getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  MachineInstr *Readfirstlane = nullptr;
+
+  // If we legalized the VGPR input, strip out the readfirstlane to analyze the
+  // incoming offset, in case there's an add of a constant. We'll have to put it
+  // back later.
+  if (OffsetDef->getOpcode() == AMDGPU::V_READFIRSTLANE_B32) {
+    Readfirstlane = OffsetDef;
+    BaseOffset = OffsetDef->getOperand(1).getReg();
+    OffsetDef = getDefIgnoringCopies(BaseOffset, *MRI);
+  }
+
+  if (OffsetDef->getOpcode() == AMDGPU::G_CONSTANT) {
+    // If we have a constant offset, try to use the 0 in m0 as the base.
+    // TODO: Look into changing the default m0 initialization value. If the
+    // default -1 only set the low 16-bits, we could leave it as-is and add 1 to
+    // the immediate offset.
+
+    ImmOffset = OffsetDef->getOperand(1).getCImm()->getZExtValue();
+    BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+      .addImm(0);
+  } else {
+    std::tie(BaseOffset, ImmOffset, OffsetDef)
+      = AMDGPU::getBaseWithConstantOffset(*MRI, BaseOffset);
+
+    if (Readfirstlane) {
+      // We have the constant offset now, so put the readfirstlane back on the
+      // variable component.
+      if (!RBI.constrainGenericRegister(BaseOffset, AMDGPU::VGPR_32RegClass, *MRI))
+        return false;
+
+      Readfirstlane->getOperand(1).setReg(BaseOffset);
+      BaseOffset = Readfirstlane->getOperand(0).getReg();
+    } else {
+      if (!RBI.constrainGenericRegister(BaseOffset,
+                                        AMDGPU::SReg_32RegClass, *MRI))
+        return false;
+    }
+
+    Register M0Base = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
+    BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::S_LSHL_B32), M0Base)
+      .addReg(BaseOffset)
+      .addImm(16);
+
+    BuildMI(*MBB, &MI, DL, TII.get(AMDGPU::COPY), AMDGPU::M0)
+      .addReg(M0Base);
+  }
+
+  // The resource id offset is computed as (<isa opaque base> + M0[21:16] +
+  // offset field) % 64. Some versions of the programming guide omit the m0
+  // part, or claim it's from offset 0.
+  auto MIB = BuildMI(*MBB, &MI, DL, TII.get(gwsIntrinToOpcode(IID)));
+
+  if (HasVSrc) {
+    Register VSrc = MI.getOperand(1).getReg();
+    MIB.addReg(VSrc);
+    if (!RBI.constrainGenericRegister(VSrc, AMDGPU::VGPR_32RegClass, *MRI))
+      return false;
+  }
+
+  MIB.addImm(ImmOffset)
+     .addImm(-1) // $gds
+     .cloneMemRefs(MI);
+
+  MI.eraseFromParent();
+  return true;
+}
+
 bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
     MachineInstr &I) const {
   MachineBasicBlock *BB = I.getParent();
@@ -1114,6 +1223,13 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC_W_SIDE_EFFECTS(
   case Intrinsic::amdgcn_ds_ordered_add:
   case Intrinsic::amdgcn_ds_ordered_swap:
     return selectDSOrderedIntrinsic(I, IntrinsicID);
+  case Intrinsic::amdgcn_ds_gws_init:
+  case Intrinsic::amdgcn_ds_gws_barrier:
+  case Intrinsic::amdgcn_ds_gws_sema_v:
+  case Intrinsic::amdgcn_ds_gws_sema_br:
+  case Intrinsic::amdgcn_ds_gws_sema_p:
+  case Intrinsic::amdgcn_ds_gws_sema_release_all:
+    return selectDSGWSIntrinsic(I, IntrinsicID);
   default:
     return selectImpl(I, *CoverageInfo);
   }

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index 38ca7fd4104b..6f5268629bee 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -101,6 +101,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
 
   bool selectStoreIntrinsic(MachineInstr &MI, bool IsFormat) const;
   bool selectDSOrderedIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
+  bool selectDSGWSIntrinsic(MachineInstr &MI, Intrinsic::ID IID) const;
 
   bool selectG_INTRINSIC_W_SIDE_EFFECTS(MachineInstr &I) const;
   int getS_CMPOpcode(CmpInst::Predicate P, unsigned Size) const;

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll
new file mode 100644
index 000000000000..17293926f035
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.barrier.ll
@@ -0,0 +1,9 @@
+; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.barrier.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.barrier.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL,GFX10 %S/../llvm.amdgcn.ds.gws.barrier.ll
+
+; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
+; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=postrapseudos -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.barrier.ll | FileCheck -enable-var-scope -check-prefix=MIR %S/../llvm.amdgcn.ds.gws.barrier.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll
new file mode 100644
index 000000000000..942c991bbd46
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.init.ll
@@ -0,0 +1,5 @@
+; XUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.init.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.init.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-GISEL %S/../llvm.amdgcn.ds.gws.init.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll
new file mode 100644
index 000000000000..a3908c51688d
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.br.ll
@@ -0,0 +1,5 @@
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.br.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.br.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.br.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll
new file mode 100644
index 000000000000..550a7312cbe1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.release.all.ll
@@ -0,0 +1,8 @@
+; RUN: not llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll 2>&1 | FileCheck -enable-var-scope -check-prefix=GFX6ERR-GISEL %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.release.all.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.release.all.ll
+
+; GFX6ERR-GISEL: LLVM ERROR: cannot select: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.ds.gws.sema.release.all), %{{[0-9]+}}:sgpr(s32) :: (store 4 into custom "GWSResource") (in function: gws_sema_release_all_offset0)
+

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll
new file mode 100644
index 000000000000..fe38b56ea254
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.gws.sema.v.ll
@@ -0,0 +1,5 @@
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP,GFX8 %S/../llvm.amdgcn.ds.gws.sema.v.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %S/../llvm.amdgcn.ds.gws.sema.v.ll
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - -verify-machineinstrs < %S/../llvm.amdgcn.ds.gws.sema.v.ll | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %S/../llvm.amdgcn.ds.gws.sema.v.ll

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
index a781fb500601..ba8acd8a9221 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,GFX10 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG,GFX10 %s
 
 ; Make sure the op is emitted bundled with a waitcnt with and without the retry loop, and the bundle is not removed by ExpandPostRAPseudos.
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -stop-after=postrapseudos -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=MIR %s
@@ -51,8 +51,13 @@ define amdgpu_kernel void @gws_barrier_offset63(i32 %val) #0 {
 ; FIXME: Should be able to shift directly into m0
 ; GCN-LABEL: {{^}}gws_barrier_sgpr_offset:
 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
+
+
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
 define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
@@ -63,8 +68,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset(i32 %val, i32 %offset) #0 {
 ; Variable offset in SGPR with constant add
 ; GCN-LABEL: {{^}}gws_barrier_sgpr_offset_add1:
 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
+
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:1 gds{{$}}
 define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
@@ -76,8 +85,12 @@ define amdgpu_kernel void @gws_barrier_sgpr_offset_add1(i32 %val, i32 %offset.ba
 ; GCN-LABEL: {{^}}gws_barrier_vgpr_offset:
 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
+
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] gds{{$}}
 define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
@@ -90,8 +103,12 @@ define amdgpu_kernel void @gws_barrier_vgpr_offset(i32 %val) #0 {
 ; GCN-LABEL: {{^}}gws_barrier_vgpr_offset_add:
 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
+
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], [[BAR_NUM]]
 ; NOLOOP: ds_gws_barrier [[GWS_VAL]] offset:3 gds{{$}}
 define amdgpu_kernel void @gws_barrier_vgpr_offset_add(i32 %val) #0 {

diff  --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
index aa490f15d97f..fe289fe06f7d 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
 ; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,LOOP %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
-; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -asm-verbose=0 -o - -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,NOLOOP,NOLOOP-SDAG %s
 
 ; Minimum offset
 ; GCN-LABEL: {{^}}gws_init_offset0:
@@ -47,8 +47,12 @@ define amdgpu_kernel void @gws_init_offset63(i32 %val) #0 {
 ; FIXME: Should be able to shift directly into m0
 ; GCN-LABEL: {{^}}gws_init_sgpr_offset:
 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
+
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_init [[GWS_VAL]] gds{{$}}
 define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
@@ -59,8 +63,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset(i32 %val, i32 %offset) #0 {
 ; Variable offset in SGPR with constant add
 ; GCN-LABEL: {{^}}gws_init_sgpr_offset_add1:
 ; NOLOOP-DAG: s_load_dwordx2 s{{\[}}[[BAR_NUM:[0-9]+]]:[[OFFSET:[0-9]+]]{{\]}}
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], s[[OFFSET]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, s[[OFFSET]], 16
+
 ; NOLOOP-DAG: v_mov_b32_e32 [[GWS_VAL:v[0-9]+]], s[[BAR_NUM]]
 ; NOLOOP: ds_gws_init [[GWS_VAL]] offset:1 gds{{$}}
 define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base) #0 {
@@ -72,8 +80,12 @@ define amdgpu_kernel void @gws_init_sgpr_offset_add1(i32 %val, i32 %offset.base)
 ; GCN-LABEL: {{^}}gws_init_vgpr_offset:
 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
+
 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_init v0 gds{{$}}
 define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
@@ -86,8 +98,12 @@ define amdgpu_kernel void @gws_init_vgpr_offset(i32 %val) #0 {
 ; GCN-LABEL: {{^}}gws_init_vgpr_offset_add:
 ; NOLOOP-DAG: s_load_dword [[BAR_NUM:s[0-9]+]]
 ; NOLOOP-DAG: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v0
-; NOLOOP-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
-; NOLOOP-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-SDAG-DAG: s_lshl_b32 [[SHL:s[0-9]+]], [[READLANE]], 16
+; NOLOOP-SDAG-DAG: s_mov_b32 m0, [[SHL]]{{$}}
+
+; NOLOOP-GISEL-DAG: s_lshl_b32 m0, [[READLANE]], 16
+
 ; NOLOOP-DAG: v_mov_b32_e32 v0, [[BAR_NUM]]
 ; NOLOOP: ds_gws_init v0 offset:3 gds{{$}}
 define amdgpu_kernel void @gws_init_vgpr_offset_add(i32 %val) #0 {


        


More information about the llvm-commits mailing list