[llvm] r364835 - AMDGPU/GlobalISel: Lower kernarg segment ptr intrinsics
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 1 11:49:01 PDT 2019
Author: arsenm
Date: Mon Jul 1 11:49:01 2019
New Revision: 364835
URL: http://llvm.org/viewvc/llvm-project?rev=364835&view=rev
Log:
AMDGPU/GlobalISel: Lower kernarg segment ptr intrinsics
Added:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
Removed:
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.kernarg.segment.ptr.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp?rev=364835&r1=364834&r2=364835&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp Mon Jul 1 11:49:01 2019
@@ -383,26 +383,6 @@ bool AMDGPUInstructionSelector::selectG_
case Intrinsic::minnum:
case Intrinsic::amdgcn_cvt_pkrtz:
return selectImpl(I, CoverageInfo);
-
- case Intrinsic::amdgcn_kernarg_segment_ptr: {
- MachineFunction *MF = I.getParent()->getParent();
- MachineRegisterInfo &MRI = MF->getRegInfo();
- const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
- const ArgDescriptor *InputPtrReg;
- const TargetRegisterClass *RC;
- const DebugLoc &DL = I.getDebugLoc();
-
- std::tie(InputPtrReg, RC)
- = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
- if (!InputPtrReg)
- report_fatal_error("missing kernarg segment ptr");
-
- BuildMI(*I.getParent(), &I, DL, TII.get(AMDGPU::COPY))
- .add(I.getOperand(0))
- .addReg(MRI.getLiveInVirtReg(InputPtrReg->getRegister()));
- I.eraseFromParent();
- return true;
- }
}
return false;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp?rev=364835&r1=364834&r2=364835&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp Mon Jul 1 11:49:01 2019
@@ -82,8 +82,9 @@ static LegalityPredicate numElementsNotE
};
}
-AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST,
- const GCNTargetMachine &TM) {
+AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
+ const GCNTargetMachine &TM)
+ : ST(ST_) {
using namespace TargetOpcode;
auto GetAddrSpacePtr = [&TM](unsigned AS) {
@@ -460,7 +461,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
[](const LegalityQuery &Query) {
return std::make_pair(0, LLT::scalar(32));
})
- .fewerElementsIf([=, &ST](const LegalityQuery &Query) {
+ .fewerElementsIf([=](const LegalityQuery &Query) {
unsigned MemSize = Query.MMODescrs[0].SizeInBits;
return (MemSize == 96) &&
Query.Types[0].isVector() &&
@@ -469,7 +470,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo
[=](const LegalityQuery &Query) {
return std::make_pair(0, V2S32);
})
- .legalIf([=, &ST](const LegalityQuery &Query) {
+ .legalIf([=](const LegalityQuery &Query) {
const LLT &Ty0 = Query.Types[0];
unsigned Size = Ty0.getSizeInBits();
@@ -1134,6 +1135,40 @@ bool AMDGPULegalizerInfo::legalizePreloa
return false;
}
+bool AMDGPULegalizerInfo::legalizeImplicitArgPtr(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const {
+ const SIMachineFunctionInfo *MFI = B.getMF().getInfo<SIMachineFunctionInfo>();
+ if (!MFI->isEntryFunction()) {
+ return legalizePreloadedArgIntrin(MI, MRI, B,
+ AMDGPUFunctionArgInfo::IMPLICIT_ARG_PTR);
+ }
+
+ B.setInstr(MI);
+
+ uint64_t Offset =
+ ST.getTargetLowering()->getImplicitParameterOffset(
+ B.getMF(), AMDGPUTargetLowering::FIRST_IMPLICIT);
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT IdxTy = LLT::scalar(DstTy.getSizeInBits());
+
+ const ArgDescriptor *Arg;
+ const TargetRegisterClass *RC;
+ std::tie(Arg, RC)
+ = MFI->getPreloadedValue(AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ if (!Arg)
+ return false;
+
+ Register KernargPtrReg = MRI.createGenericVirtualRegister(DstTy);
+ if (!loadInputValue(KernargPtrReg, B, Arg))
+ return false;
+
+ B.buildGEP(DstReg, KernargPtrReg, B.buildConstant(IdxTy, Offset).getReg(0));
+ MI.eraseFromParent();
+ return true;
+}
+
bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
MachineRegisterInfo &MRI,
MachineIRBuilder &B) const {
@@ -1179,6 +1214,11 @@ bool AMDGPULegalizerInfo::legalizeIntrin
return false;
}
+ case Intrinsic::amdgcn_kernarg_segment_ptr:
+ return legalizePreloadedArgIntrin(
+ MI, MRI, B, AMDGPUFunctionArgInfo::KERNARG_SEGMENT_PTR);
+ case Intrinsic::amdgcn_implicitarg_ptr:
+ return legalizeImplicitArgPtr(MI, MRI, B);
case Intrinsic::amdgcn_workitem_id_x:
return legalizePreloadedArgIntrin(MI, MRI, B,
AMDGPUFunctionArgInfo::WORKITEM_ID_X);
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h?rev=364835&r1=364834&r2=364835&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPULegalizerInfo.h Mon Jul 1 11:49:01 2019
@@ -25,6 +25,8 @@ class GCNSubtarget;
/// This class provides the information for the target register banks.
class AMDGPULegalizerInfo : public LegalizerInfo {
+ const GCNSubtarget &ST;
+
public:
AMDGPULegalizerInfo(const GCNSubtarget &ST,
const GCNTargetMachine &TM);
@@ -57,6 +59,8 @@ public:
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
+ bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B) const;
bool legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder) const override;
Removed: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.kernarg.segment.ptr.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.kernarg.segment.ptr.mir?rev=364834&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.kernarg.segment.ptr.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.kernarg.segment.ptr.mir (removed)
@@ -1,19 +0,0 @@
-# XFAIL: *
-# RUN: llc -march=amdgcn -run-pass=instruction-select -verify-machineinstrs -global-isel %s -o - | FileCheck %s -check-prefixes=GCN
-
-# FIXME: This requires additional context for what input registers are special inputs not present in MIR.
-
----
-
-name: kernarg_segment_Ptr
-legalized: true
-regBankSelected: true
-
-body: |
- bb.0:
- %0:vgpr(p4) = G_INTRINSIC intrinsic(@llvm.amdgcn.kernarg.segment.ptr)
- %1:sgpr(s32) = G_LOAD %0 :: (load 4)
- %2:vgpr(p1) = G_IMPLICIT_DEF
- G_STORE %1, %2 :: (store 4)
-...
----
Added: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll?rev=364835&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.kernarg.segment.ptr.ll Mon Jul 1 11:49:01 2019
@@ -0,0 +1,125 @@
+; RUN: llc -global-isel -mtriple=amdgcn--amdhsa -mattr=-code-object-v3 -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s
+
+; ALL-LABEL: {{^}}test:
+; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 8
+; HSA: kernarg_segment_alignment = 4
+
+; CO-V2: s_load_dword s{{[0-9]+}}, s[4:5], 0xa
+
+; OS-UNKNOWN: s_load_dword s{{[0-9]+}}, s[0:1], 0xa
+define amdgpu_kernel void @test(i32 addrspace(1)* %out) #1 {
+ %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
+ %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(4)* %gep
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; ALL-LABEL: {{^}}test_implicit:
+; HSA: kernarg_segment_byte_size = 8
+; OS-MESA3D: kernarg_segment_byte_size = 24
+; CO-V2: kernarg_segment_alignment = 4
+
+; 10 + 9 (36 prepended implicit bytes) + 2(out pointer) = 21 = 0x15
+
+; OS-UNKNOWN: s_add_u32 s[[LO:[0-9]+]], s0, 44
+; OS-UNKNOWN-NEXT: s_addc_u32 s[[HI:[0-9]+]], s1, 0
+; OS-UNKNOWN-DAG: s_load_dword s{{[0-9]+}}, s{{\[}}[[LO]]:[[HI]]{{\]}}, 0xa
+define amdgpu_kernel void @test_implicit(i32 addrspace(1)* %out) #1 {
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %header.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(4)* %gep
+ store i32 %value, i32 addrspace(1)* %out
+ ret void
+}
+
+; ALL-LABEL: {{^}}test_implicit_alignment:
+; HSA: kernarg_segment_byte_size = 12
+; OS-MESA3D: kernarg_segment_byte_size = 28
+; CO-V2: kernarg_segment_alignment = 4
+
+
+; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
+; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
+; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
+; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
+; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
+define amdgpu_kernel void @test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #1 {
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %val = load i32, i32 addrspace(4)* %arg.ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; ALL-LABEL: {{^}}opencl_test_implicit_alignment
+; HSA: kernarg_segment_byte_size = 64
+; OS-MESA3D: kernarg_segment_byte_size = 28
+; CO-V2: kernarg_segment_alignment = 4
+
+
+; OS-UNKNOWN: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xc
+; HSA: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x4
+; OS-MESA3D: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x3
+; ALL: v_mov_b32_e32 [[V_VAL:v[0-9]+]], [[VAL]]
+; ALL: flat_store_dword v[{{[0-9]+:[0-9]+}}], [[V_VAL]]
+define amdgpu_kernel void @opencl_test_implicit_alignment(i32 addrspace(1)* %out, <2 x i8> %in) #2 {
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %val = load i32, i32 addrspace(4)* %arg.ptr
+ store i32 %val, i32 addrspace(1)* %out
+ ret void
+}
+
+; ALL-LABEL: {{^}}test_no_kernargs:
+; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
+; HSA: kernarg_segment_byte_size = 0
+; OS-MESA3D: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 4
+
+; HSA: s_load_dword s{{[0-9]+}}, s[4:5]
+define amdgpu_kernel void @test_no_kernargs() #1 {
+ %kernarg.segment.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr()
+ %header.ptr = bitcast i8 addrspace(4)* %kernarg.segment.ptr to i32 addrspace(4)*
+ %gep = getelementptr i32, i32 addrspace(4)* %header.ptr, i64 10
+ %value = load i32, i32 addrspace(4)* %gep
+ store volatile i32 %value, i32 addrspace(1)* undef
+ ret void
+}
+
+; GCN-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs:
+; HSA: kernarg_segment_byte_size = 48
+; OS-MESA3d: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 4
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs() #2 {
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %val = load volatile i32, i32 addrspace(4)* %arg.ptr
+ store volatile i32 %val, i32 addrspace(1)* null
+ ret void
+}
+
+; GCN-LABEL: {{^}}opencl_test_implicit_alignment_no_explicit_kernargs_round_up:
+; HSA: kernarg_segment_byte_size = 40
+; OS-MESA3D: kernarg_segment_byte_size = 16
+; CO-V2: kernarg_segment_alignment = 4
+define amdgpu_kernel void @opencl_test_implicit_alignment_no_explicit_kernargs_round_up() #3 {
+ %implicitarg.ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr()
+ %arg.ptr = bitcast i8 addrspace(4)* %implicitarg.ptr to i32 addrspace(4)*
+ %val = load volatile i32, i32 addrspace(4)* %arg.ptr
+ store volatile i32 %val, i32 addrspace(1)* null
+ ret void
+}
+
+declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
+declare i8 addrspace(4)* @llvm.amdgcn.implicitarg.ptr() #0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind "amdgpu-implicitarg-num-bytes"="48" }
+attributes #3 = { nounwind "amdgpu-implicitarg-num-bytes"="38" }
More information about the llvm-commits
mailing list