[llvm] r364833 - AMDGPU/GlobalISel: Legalize workitem ID intrinsics

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 1 11:45:36 PDT 2019


Author: arsenm
Date: Mon Jul  1 11:45:36 2019
New Revision: 364833

URL: http://llvm.org/viewvc/llvm-project?rev=364833&view=rev
Log:
AMDGPU/GlobalISel: Legalize workitem ID intrinsics

Tests don't cover the masked input path since non-kernel arguments
aren't lowered yet.

Test is copied directly from the existing test, with 2 additions.

Added:
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
Modified:
    llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
    llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp?rev=364833&r1=364832&r2=364833&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp Mon Jul  1 11:45:36 2019
@@ -166,6 +166,38 @@ static Register findFirstFreeSGPR(CCStat
   llvm_unreachable("Cannot allocate sgpr");
 }
 
+static void allocateSpecialEntryInputVGPRs(CCState &CCInfo,
+                                           MachineFunction &MF,
+                                           const SIRegisterInfo &TRI,
+                                           SIMachineFunctionInfo &Info) {
+  const LLT S32 = LLT::scalar(32);
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  if (Info.hasWorkItemIDX()) {
+    Register Reg = AMDGPU::VGPR0;
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+    CCInfo.AllocateReg(Reg);
+    Info.setWorkItemIDX(ArgDescriptor::createRegister(Reg));
+  }
+
+  if (Info.hasWorkItemIDY()) {
+    Register Reg = AMDGPU::VGPR1;
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+    CCInfo.AllocateReg(Reg);
+    Info.setWorkItemIDY(ArgDescriptor::createRegister(Reg));
+  }
+
+  if (Info.hasWorkItemIDZ()) {
+    Register Reg = AMDGPU::VGPR2;
+    MRI.setType(MF.addLiveIn(Reg, &AMDGPU::VGPR_32RegClass), S32);
+
+    CCInfo.AllocateReg(Reg);
+    Info.setWorkItemIDZ(ArgDescriptor::createRegister(Reg));
+  }
+}
+
 static void allocateSystemSGPRs(CCState &CCInfo,
                                 MachineFunction &MF,
                                 SIMachineFunctionInfo &Info,
@@ -288,6 +320,7 @@ bool AMDGPUCallLowering::lowerFormalArgu
       ++i;
     }
 
+    allocateSpecialEntryInputVGPRs(CCInfo, MF, *TRI, *Info);
     allocateSystemSGPRs(CCInfo, MF, *Info, F.getCallingConv(), IsShader);
     return true;
   }

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=364833&r1=364832&r2=364833&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Mon Jul  1 11:45:36 2019
@@ -23,6 +23,8 @@
 #include "llvm/IR/Type.h"
 #include "llvm/Support/Debug.h"
 
+#define DEBUG_TYPE "amdgpu-legalinfo"
+
 using namespace llvm;
 using namespace LegalizeActions;
 using namespace LegalizeMutations;
@@ -1059,6 +1061,79 @@ static MachineInstr *verifyCFIntrinsic(M
     UseMI.getOpcode() == AMDGPU::G_BRCOND ? &UseMI : nullptr;
 }
 
+Register AMDGPULegalizerInfo::getLiveInRegister(MachineRegisterInfo &MRI,
+                                                Register Reg, LLT Ty) const {
+  Register LiveIn = MRI.getLiveInVirtReg(Reg);
+  if (LiveIn)
+    return LiveIn;
+
+  Register NewReg = MRI.createGenericVirtualRegister(Ty);
+  MRI.addLiveIn(Reg, NewReg);
+  return NewReg;
+}
+
+bool AMDGPULegalizerInfo::loadInputValue(Register DstReg, MachineIRBuilder &B,
+                                         const ArgDescriptor *Arg) const {
+  if (!Arg->isRegister())
+    return false; // TODO: Handle these
+
+  assert(Arg->getRegister() != 0);
+  assert(Arg->getRegister().isPhysical());
+
+  MachineRegisterInfo &MRI = *B.getMRI();
+
+  LLT Ty = MRI.getType(DstReg);
+  Register LiveIn = getLiveInRegister(MRI, Arg->getRegister(), Ty);
+
+  if (Arg->isMasked()) {
+    // TODO: Should we try to emit this once in the entry block?
+    const LLT S32 = LLT::scalar(32);
+    const unsigned Mask = Arg->getMask();
+    const unsigned Shift = countTrailingZeros<unsigned>(Mask);
+
+    auto ShiftAmt = B.buildConstant(S32, Shift);
+    auto LShr = B.buildLShr(S32, LiveIn, ShiftAmt);
+    B.buildAnd(DstReg, LShr, B.buildConstant(S32, Mask >> Shift));
+  } else
+    B.buildCopy(DstReg, LiveIn);
+
+  // Insert the argument copy if it doens't already exist.
+  // FIXME: It seems EmitLiveInCopies isn't called anywhere?
+  if (!MRI.getVRegDef(LiveIn)) {
+    MachineBasicBlock &EntryMBB = B.getMF().front();
+    EntryMBB.addLiveIn(Arg->getRegister());
+    B.setInsertPt(EntryMBB, EntryMBB.begin());
+    B.buildCopy(LiveIn, Arg->getRegister());
+  }
+
+  return true;
+}
+
+bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
+  MachineInstr &MI,
+  MachineRegisterInfo &MRI,
+  MachineIRBuilder &B,
+  AMDGPUFunctionArgInfo::PreloadedValue ArgType) const {
+  B.setInstr(MI);
+
+  const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+
+  const ArgDescriptor *Arg;
+  const TargetRegisterClass *RC;
+  std::tie(Arg, RC) = MFI->getPreloadedValue(ArgType);
+  if (!Arg) {
+    LLVM_DEBUG(dbgs() << "Required arg register missing\n");
+    return false;
+  }
+
+  if (loadInputValue(MI.getOperand(0).getReg(), B, Arg)) {
+    MI.eraseFromParent();
+    return true;
+  }
+
+  return false;
+}
+
 bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
                                             MachineRegisterInfo &MRI,
                                             MachineIRBuilder &B) const {
@@ -1104,6 +1179,15 @@ bool AMDGPULegalizerInfo::legalizeIntrin
 
     return false;
   }
+  case Intrinsic::amdgcn_workitem_id_x:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKITEM_ID_X);
+  case Intrinsic::amdgcn_workitem_id_y:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKITEM_ID_Y);
+  case Intrinsic::amdgcn_workitem_id_z:
+    return legalizePreloadedArgIntrin(MI, MRI, B,
+                                      AMDGPUFunctionArgInfo::WORKITEM_ID_Z);
   default:
     return true;
   }

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h?rev=364833&r1=364832&r2=364833&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h Mon Jul  1 11:45:36 2019
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINELEGALIZER_H
 
 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "AMDGPUArgumentUsageInfo.h"
 
 namespace llvm {
 
@@ -47,6 +48,15 @@ public:
   bool legalizeITOFP(MachineInstr &MI, MachineRegisterInfo &MRI,
                      MachineIRBuilder &MIRBuilder, bool Signed) const;
 
+  Register getLiveInRegister(MachineRegisterInfo &MRI,
+                             Register Reg, LLT Ty) const;
+
+  bool loadInputValue(Register DstReg, MachineIRBuilder &B,
+                      const ArgDescriptor *Arg) const;
+  bool legalizePreloadedArgIntrin(
+    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
+    AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+
   bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
                          MachineIRBuilder &MIRBuilder) const override;
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll?rev=364833&r1=364832&r2=364833&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_kernel-system-sgprs.ll Mon Jul  1 11:45:36 2019
@@ -3,7 +3,8 @@
 ; HSA-LABEL: name: default_kernel
 ; HSA: liveins:
 ; HSA-NEXT: - { reg: '$sgpr0_sgpr1_sgpr2_sgpr3', virtual-reg: '%0' }
-; HSA-NEXT: - { reg: '$sgpr4', virtual-reg: '%1' }
+; HSA-NEXT: - { reg: '$vgpr0', virtual-reg: '%1' }
+; HSA-NEXT: - { reg: '$sgpr4', virtual-reg: '%2' }
 ; HSA-NEXT: frameInfo:
 define amdgpu_kernel void @default_kernel() {
   ret void

Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll?rev=364833&r1=364832&r2=364833&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-fast-math-flags.ll Mon Jul  1 11:45:36 2019
@@ -20,7 +20,7 @@ define amdgpu_kernel void @fma_fast(floa
 
 ; Check flags are preserved for an arbitrarry target intrinsic
 ; CHECK-LABEL: name: rcp_nsz
-; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %8(s32)
+; CHECK: = nsz G_INTRINSIC intrinsic(@llvm.amdgcn.rcp), %{{[0-9]+}}(s32)
 define amdgpu_kernel void @rcp_nsz(float %arg0) {
   %res = call nsz float @llvm.amdgcn.rcp.f32 (float %arg0)
   store float %res, float addrspace(1)* undef

Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll?rev=364833&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll Mon Jul  1 11:45:36 2019
@@ -0,0 +1,92 @@
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mattr=-code-object-v3 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=CI-HSA  %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mattr=-code-object-v3 -mcpu=carrizo -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=CO-V2 -check-prefix=VI-HSA  %s
+; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=SI-MESA %s
+; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=ALL -check-prefix=MESA -check-prefix=VI-MESA %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=-code-object-v3 -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,SI-MESA %s
+; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=-code-object-v3 -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,CO-V2,VI-MESA %s
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.z() #0
+
+; MESA: .section .AMDGPU.config
+; MESA: .long 47180
+; MESA-NEXT: .long 132{{$}}
+
+; ALL-LABEL {{^}}test_workitem_id_x:
+; CO-V2: enable_vgpr_workitem_id = 0
+
+; ALL-NOT: v0
+; ALL: {{buffer|flat}}_store_dword {{.*}}v0
+define amdgpu_kernel void @test_workitem_id_x(i32 addrspace(1)* %out) #1 {
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  store i32 %id, i32 addrspace(1)* %out
+  ret void
+}
+
+; MESA: .section .AMDGPU.config
+; MESA: .long 47180
+; MESA-NEXT: .long 2180{{$}}
+
+; ALL-LABEL {{^}}test_workitem_id_y:
+; CO-V2: enable_vgpr_workitem_id = 1
+
+; ALL-NOT: v1
+; ALL: {{buffer|flat}}_store_dword {{.*}}v1
+define amdgpu_kernel void @test_workitem_id_y(i32 addrspace(1)* %out) #1 {
+  %id = call i32 @llvm.amdgcn.workitem.id.y()
+  store i32 %id, i32 addrspace(1)* %out
+  ret void
+}
+
+; MESA: .section .AMDGPU.config
+; MESA: .long 47180
+; MESA-NEXT: .long 4228{{$}}
+
+; ALL-LABEL {{^}}test_workitem_id_z:
+; CO-V2: enable_vgpr_workitem_id = 2
+
+; ALL-NOT: v2
+; ALL: {{buffer|flat}}_store_dword {{.*}}v2
+define amdgpu_kernel void @test_workitem_id_z(i32 addrspace(1)* %out) #1 {
+  %id = call i32 @llvm.amdgcn.workitem.id.z()
+  store i32 %id, i32 addrspace(1)* %out
+  ret void
+}
+
+; ALL-LABEL: {{^}}test_workitem_id_x_usex2:
+; ALL-NOT: v0
+; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
+; ALL-NOT: v0
+; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
+define amdgpu_kernel void @test_workitem_id_x_usex2(i32 addrspace(1)* %out) #1 {
+  %id0 = call i32 @llvm.amdgcn.workitem.id.x()
+  store volatile i32 %id0, i32 addrspace(1)* %out
+
+  %id1 = call i32 @llvm.amdgcn.workitem.id.x()
+  store volatile i32 %id1, i32 addrspace(1)* %out
+  ret void
+}
+
+; ALL-LABEL: {{^}}test_workitem_id_x_use_outside_entry:
+; ALL-NOT: v0
+; ALL: flat_store_dword
+; ALL-NOT: v0
+; ALL: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
+define amdgpu_kernel void @test_workitem_id_x_use_outside_entry(i32 addrspace(1)* %out, i32 %arg) #1 {
+bb0:
+  store volatile i32 0, i32 addrspace(1)* %out
+  %cond = icmp eq i32 %arg, 0
+  br i1 %cond, label %bb1, label %bb2
+
+bb1:
+  %id = call i32 @llvm.amdgcn.workitem.id.x()
+  store volatile i32 %id, i32 addrspace(1)* %out
+  br label %bb2
+
+bb2:
+  ret void
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }




More information about the llvm-commits mailing list