[llvm] r366584 - AMDGPU/GlobalISel: Support arguments with multiple registers
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 19 07:29:30 PDT 2019
Author: arsenm
Date: Fri Jul 19 07:29:30 2019
New Revision: 366584
URL: http://llvm.org/viewvc/llvm-project?rev=366584&view=rev
Log:
AMDGPU/GlobalISel: Support arguments with multiple registers
Handles structs used directly in argument lists.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp?rev=366584&r1=366583&r2=366584&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.cpp Fri Jul 19 07:29:30 2019
@@ -152,33 +152,45 @@ void AMDGPUCallLowering::splitToValueTyp
SmallVector<EVT, 4> SplitVTs;
ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs);
- EVT VT = SplitVTs[0];
- unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+ assert(OrigArg.Regs.size() == SplitVTs.size());
- if (NumParts == 1) {
- // No splitting to do, but we want to replace the original type (e.g. [1 x
- // double] -> double).
- SplitArgs.emplace_back(OrigArg.Regs[0], VT.getTypeForEVT(Ctx),
- OrigArg.Flags, OrigArg.IsFixed);
- return;
- }
+ int SplitIdx = 0;
+ for (EVT VT : SplitVTs) {
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+ Type *Ty = VT.getTypeForEVT(Ctx);
- LLT LLTy = getLLTForType(*OrigArg.Ty, DL);
- SmallVector<Register, 8> SplitRegs;
- EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
- Type *PartTy = PartVT.getTypeForEVT(Ctx);
- LLT PartLLT = getLLTForType(*PartTy, DL);
-
- // FIXME: Should we be reporting all of the part registers for a single
- // argument, and let handleAssignments take care of the repacking?
- for (unsigned i = 0; i < NumParts; ++i) {
- Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
- SplitRegs.push_back(PartReg);
- SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
- }
- PerformArgSplit(SplitRegs, LLTy, PartLLT);
+ if (NumParts == 1) {
+ // No splitting to do, but we want to replace the original type (e.g. [1 x
+ // double] -> double).
+ SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
+ OrigArg.Flags, OrigArg.IsFixed);
+
+ ++SplitIdx;
+ continue;
+ }
+
+ LLT LLTy = getLLTForType(*Ty, DL);
+
+ SmallVector<Register, 8> SplitRegs;
+
+ EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
+ Type *PartTy = PartVT.getTypeForEVT(Ctx);
+ LLT PartLLT = getLLTForType(*PartTy, DL);
+
+ // FIXME: Should we be reporting all of the part registers for a single
+ // argument, and let handleAssignments take care of the repacking?
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Register PartReg = MRI.createGenericVirtualRegister(PartLLT);
+ SplitRegs.push_back(PartReg);
+ SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
+ }
+
+ PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
+
+ ++SplitIdx;
+ }
}
bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
@@ -485,11 +497,11 @@ bool AMDGPUCallLowering::lowerFormalArgu
if (!IsShader && InReg)
return false;
- // TODO: Handle multiple registers and sret.
+ // TODO: Handle sret.
if (Arg.hasAttribute(Attribute::StructRet) ||
Arg.hasAttribute(Attribute::SwiftSelf) ||
Arg.hasAttribute(Attribute::SwiftError) ||
- Arg.hasAttribute(Attribute::Nest) || VRegs[Idx].size() > 1)
+ Arg.hasAttribute(Attribute::Nest))
return false;
if (CC == CallingConv::AMDGPU_PS && !InReg && PSInputNum <= 15) {
@@ -505,7 +517,9 @@ bool AMDGPUCallLowering::lowerFormalArgu
++PSInputNum;
if (SkipArg) {
- MIRBuilder.buildUndef(VRegs[Idx][0]);
+ for (int I = 0, E = VRegs[Idx].size(); I != E; ++I)
+ MIRBuilder.buildUndef(VRegs[Idx][I]);
+
++Idx;
continue;
}
@@ -513,11 +527,14 @@ bool AMDGPUCallLowering::lowerFormalArgu
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
- splitToValueTypes(OrigArg, SplitArgs, DL, MRI, CC,
+
+ splitToValueTypes(
+ OrigArg, SplitArgs, DL, MRI, CC,
// FIXME: We should probably be passing multiple registers to
// handleAssignments to do this
- [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT) {
- packSplitRegsToOrigType(MIRBuilder, VRegs[Idx], Regs, LLTy, PartLLT);
+ [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ packSplitRegsToOrigType(MIRBuilder, VRegs[Idx][VTSplitIdx], Regs,
+ LLTy, PartLLT);
});
++Idx;
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h?rev=366584&r1=366583&r2=366584&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUCallLowering.h Fri Jul 19 07:29:30 2019
@@ -30,7 +30,7 @@ class AMDGPUCallLowering: public CallLow
Register DstReg) const;
/// A function of this type is used to perform value split action.
- using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT)>;
+ using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT, int)>;
void splitToValueTypes(const ArgInfo &OrigArgInfo,
SmallVectorImpl<ArgInfo> &SplitArgs,
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll?rev=366584&r1=366583&r2=366584&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-amdgpu_ps.ll Fri Jul 19 07:29:30 2019
@@ -1,10 +1,37 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -stop-after=irtranslator -global-isel %s -o - | FileCheck %s
; Check that we correctly skip over disabled inputs
-; CHECK: [[S0:%[0-9]+]]:_(s32) = COPY $sgpr2
-; CHECK: [[V0:%[0-9]+]]:_(s32) = COPY $vgpr0
-; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), %{{[0-9]+}}(s32), %{{[0-9]+}}(s32), [[S0]](s32), [[S0]](s32), [[S0]](s32), [[V0]](s32)
-define amdgpu_ps void @ps0(float inreg %arg0, float %psinput0, float %psinput1) #1 {
+define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 {
+ ; CHECK-LABEL: name: disabled_input
+ ; CHECK: bb.1.main_body:
+ ; CHECK: liveins: $sgpr2, $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
+ ; CHECK: S_ENDPGM 0
+main_body:
+ call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
+ ret void
+}
+
+define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) #1 {
+ ; CHECK-LABEL: name: disabled_input_struct
+ ; CHECK: bb.1.main_body:
+ ; CHECK: liveins: $sgpr2, $vgpr0
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+ ; CHECK: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
+ ; CHECK: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), [[C]](s32), [[C1]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), [[C2]](s1), [[C2]](s1)
+ ; CHECK: S_ENDPGM 0
main_body:
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0
ret void
Modified: llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll?rev=366584&r1=366583&r2=366584&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/GlobalISel/irtranslator-function-args.ll Fri Jul 19 07:29:30 2019
@@ -1,10 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -global-isel-abort=2 -pass-remarks-missed='gisel*' -verify-machineinstrs -o - %s 2> %t | FileCheck %s
-; RUN: FileCheck -check-prefix=ERR %s < %t
-
-; ERR-NOT: remark
-; ERR: remark: <unknown>:0:0: unable to lower arguments: void ({ i8, i32 })* (in function: void_func_struct_i8_i32)
-; ERR-NOT: remark
+; RUN: llc -march=amdgcn -mcpu=tahiti -O0 -stop-after=irtranslator -global-isel -verify-machineinstrs -o - %s | FileCheck %s
define void @void_func_i1(i1 %arg0) #0 {
; CHECK-LABEL: name: void_func_i1
@@ -1138,9 +1133,17 @@ define void @void_func_struct_i32({ i32
define void @void_func_struct_i8_i32({ i8, i32 } %arg0) #0 {
; CHECK-LABEL: name: void_func_struct_i8_i32
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: G_STORE [[TRUNC]](s8), [[DEF]](p1) :: (store 1 into `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
+ ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK: [[GEP:%[0-9]+]]:_(p1) = G_GEP [[DEF]], [[C]](s64)
+ ; CHECK: G_STORE [[COPY1]](s32), [[GEP]](p1) :: (store 4 into `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+ ; CHECK: S_ENDPGM 0
store { i8, i32 } %arg0, { i8, i32 } addrspace(1)* undef
ret void
}
More information about the llvm-commits
mailing list