[llvm] eb41627 - AMDGPU/GlobalISel: Improve handling of illegal return types
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 9 13:11:43 PDT 2020
Author: Matt Arsenault
Date: 2020-03-09T13:11:30-07:00
New Revision: eb41627799b30667fe7fe06d485d5501c8923f50
URL: https://github.com/llvm/llvm-project/commit/eb41627799b30667fe7fe06d485d5501c8923f50
DIFF: https://github.com/llvm/llvm-project/commit/eb41627799b30667fe7fe06d485d5501c8923f50.diff
LOG: AMDGPU/GlobalISel: Improve handling of illegal return types
Most importantly, this fixes ret i8. Also make sure to handle
signext/zeroext for odd types > i32. Some of the corresponding
argument passing fixes also need to be handled.
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index ec0f38afc48b..f32f9ec0e6dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -153,10 +153,26 @@ AMDGPUCallLowering::AMDGPUCallLowering(const AMDGPUTargetLowering &TLI)
: CallLowering(&TLI) {
}
+// FIXME: Compatability shim
+static ISD::NodeType extOpcodeToISDExtOpcode(unsigned MIOpc) {
+ switch (MIOpc) {
+ case TargetOpcode::G_SEXT:
+ return ISD::SIGN_EXTEND;
+ case TargetOpcode::G_ZEXT:
+ return ISD::ZERO_EXTEND;
+ case TargetOpcode::G_ANYEXT:
+ return ISD::ANY_EXTEND;
+ default:
+ llvm_unreachable("not an extend opcode");
+ }
+}
+
void AMDGPUCallLowering::splitToValueTypes(
- const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv,
- SplitArgTy PerformArgSplit) const {
+ MachineIRBuilder &B,
+ const ArgInfo &OrigArg, unsigned OrigArgIdx,
+ SmallVectorImpl<ArgInfo> &SplitArgs,
+ const DataLayout &DL, CallingConv::ID CallConv,
+ SplitArgTy PerformArgSplit) const {
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
LLVMContext &Ctx = OrigArg.Ty->getContext();
@@ -170,28 +186,46 @@ void AMDGPUCallLowering::splitToValueTypes(
int SplitIdx = 0;
for (EVT VT : SplitVTs) {
- unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+ Register Reg = OrigArg.Regs[SplitIdx];
Type *Ty = VT.getTypeForEVT(Ctx);
+ LLT LLTy = getLLTForType(*Ty, DL);
+ if (OrigArgIdx == AttributeList::ReturnIndex && VT.isScalarInteger()) {
+ unsigned ExtendOp = TargetOpcode::G_ANYEXT;
+ if (OrigArg.Flags[0].isSExt()) {
+ assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
+ ExtendOp = TargetOpcode::G_SEXT;
+ } else if (OrigArg.Flags[0].isZExt()) {
+ assert(OrigArg.Regs.size() == 1 && "expect only simple return values");
+ ExtendOp = TargetOpcode::G_ZEXT;
+ }
+ EVT ExtVT = TLI.getTypeForExtReturn(Ctx, VT,
+ extOpcodeToISDExtOpcode(ExtendOp));
+ if (ExtVT != VT) {
+ VT = ExtVT;
+ Ty = ExtVT.getTypeForEVT(Ctx);
+ LLTy = getLLTForType(*Ty, DL);
+ Reg = B.buildInstr(ExtendOp, {LLTy}, {Reg}).getReg(0);
+ }
+ }
+
+ unsigned NumParts = TLI.getNumRegistersForCallingConv(Ctx, CallConv, VT);
+ MVT RegVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
if (NumParts == 1) {
// No splitting to do, but we want to replace the original type (e.g. [1 x
// double] -> double).
- SplitArgs.emplace_back(OrigArg.Regs[SplitIdx], Ty,
- OrigArg.Flags, OrigArg.IsFixed);
+ SplitArgs.emplace_back(Reg, Ty, OrigArg.Flags, OrigArg.IsFixed);
++SplitIdx;
continue;
}
- LLT LLTy = getLLTForType(*Ty, DL);
-
SmallVector<Register, 8> SplitRegs;
-
- EVT PartVT = TLI.getRegisterTypeForCallingConv(Ctx, CallConv, VT);
- Type *PartTy = PartVT.getTypeForEVT(Ctx);
+ Type *PartTy = EVT(RegVT).getTypeForEVT(Ctx);
LLT PartLLT = getLLTForType(*PartTy, DL);
+ MachineRegisterInfo &MRI = *B.getMRI();
// FIXME: Should we be reporting all of the part registers for a single
// argument, and let handleAssignments take care of the repacking?
@@ -201,7 +235,7 @@ void AMDGPUCallLowering::splitToValueTypes(
SplitArgs.emplace_back(ArrayRef<Register>(PartReg), PartTy, OrigArg.Flags);
}
- PerformArgSplit(SplitRegs, LLTy, PartLLT, SplitIdx);
+ PerformArgSplit(SplitRegs, Reg, LLTy, PartLLT, SplitIdx);
++SplitIdx;
}
@@ -221,6 +255,7 @@ static LLT getMultipleType(LLT OrigTy, int Factor) {
static void unpackRegsToOrigType(MachineIRBuilder &B,
ArrayRef<Register> DstRegs,
Register SrcReg,
+ const CallLowering::ArgInfo &Info,
LLT SrcTy,
LLT PartTy) {
assert(DstRegs.size() > 1 && "Nothing to unpack");
@@ -266,24 +301,26 @@ bool AMDGPUCallLowering::lowerReturnVal(MachineIRBuilder &B,
auto &MF = B.getMF();
const auto &F = MF.getFunction();
const DataLayout &DL = MF.getDataLayout();
+ MachineRegisterInfo *MRI = B.getMRI();
CallingConv::ID CC = F.getCallingConv();
const SITargetLowering &TLI = *getTLI<SITargetLowering>();
- MachineRegisterInfo &MRI = MF.getRegInfo();
ArgInfo OrigRetInfo(VRegs, Val->getType());
setArgFlags(OrigRetInfo, AttributeList::ReturnIndex, DL, F);
SmallVector<ArgInfo, 4> SplitRetInfos;
splitToValueTypes(
- OrigRetInfo, SplitRetInfos, DL, MRI, CC,
- [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
- unpackRegsToOrigType(B, Regs, VRegs[VTSplitIdx], LLTy, PartLLT);
+ B, OrigRetInfo, AttributeList::ReturnIndex, SplitRetInfos, DL, CC,
+ [&](ArrayRef<Register> Regs, Register SrcReg, LLT LLTy, LLT PartLLT,
+ int VTSplitIdx) {
+ unpackRegsToOrigType(B, Regs, SrcReg,
+ SplitRetInfos[VTSplitIdx],
+ LLTy, PartLLT);
});
CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(CC, F.isVarArg());
-
- OutgoingValueHandler RetHandler(B, MF.getRegInfo(), Ret, AssignFn);
+ OutgoingValueHandler RetHandler(B, *MRI, Ret, AssignFn);
return handleAssignments(B, SplitRetInfos, RetHandler);
}
@@ -308,7 +345,7 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &B,
return true;
}
- auto const &ST = B.getMF().getSubtarget<GCNSubtarget>();
+ auto const &ST = MF.getSubtarget<GCNSubtarget>();
unsigned ReturnOpc =
IsShader ? AMDGPU::SI_RETURN_TO_EPILOG : AMDGPU::S_SETPC_B64_return;
@@ -663,13 +700,16 @@ bool AMDGPUCallLowering::lowerFormalArguments(
}
ArgInfo OrigArg(VRegs[Idx], Arg.getType());
- setArgFlags(OrigArg, Idx + AttributeList::FirstArgIndex, DL, F);
+ const unsigned OrigArgIdx = Idx + AttributeList::FirstArgIndex;
+ setArgFlags(OrigArg, OrigArgIdx, DL, F);
splitToValueTypes(
- OrigArg, SplitArgs, DL, MRI, CC,
+ B, OrigArg, OrigArgIdx, SplitArgs, DL, CC,
// FIXME: We should probably be passing multiple registers to
// handleAssignments to do this
- [&](ArrayRef<Register> Regs, LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ [&](ArrayRef<Register> Regs, Register DstReg,
+ LLT LLTy, LLT PartLLT, int VTSplitIdx) {
+ assert(DstReg == VRegs[Idx][VTSplitIdx]);
packSplitRegsToOrigType(B, VRegs[Idx][VTSplitIdx], Regs,
LLTy, PartLLT);
});
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 53a562586bc0..3651dd40bc9f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -30,11 +30,13 @@ class AMDGPUCallLowering: public CallLowering {
unsigned Align, Register DstReg) const;
/// A function of this type is used to perform value split action.
- using SplitArgTy = std::function<void(ArrayRef<Register>, LLT, LLT, int)>;
+ using SplitArgTy = std::function<void(ArrayRef<Register>, Register, LLT, LLT, int)>;
- void splitToValueTypes(const ArgInfo &OrigArgInfo,
+ void splitToValueTypes(MachineIRBuilder &B,
+ const ArgInfo &OrigArgInfo,
+ unsigned OrigArgIdx,
SmallVectorImpl<ArgInfo> &SplitArgs,
- const DataLayout &DL, MachineRegisterInfo &MRI,
+ const DataLayout &DL,
CallingConv::ID CallConv,
SplitArgTy SplitArg) const;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
index 008b3c491260..82ecb616aa11 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/function-returns.ll
@@ -46,41 +46,92 @@ define signext i1 @i1_signext_func_void() #0 {
ret i1 %val
}
+define i7 @i7_func_void() #0 {
+ ; CHECK-LABEL: name: i7_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s7)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+ %val = load i7, i7 addrspace(1)* undef
+ ret i7 %val
+}
+
+define zeroext i7 @i7_zeroext_func_void() #0 {
+ ; CHECK-LABEL: name: i7_zeroext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s7)
+ ; CHECK: $vgpr0 = COPY [[ZEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+ %val = load i7, i7 addrspace(1)* undef
+ ret i7 %val
+}
+
+define signext i7 @i7_signext_func_void() #0 {
+ ; CHECK-LABEL: name: i7_signext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s7) = G_LOAD [[DEF]](p1) :: (load 1 from `i7 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s7)
+ ; CHECK: $vgpr0 = COPY [[SEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
+ %val = load i7, i7 addrspace(1)* undef
+ ret i7 %val
+}
+
define i8 @i8_func_void() #0 {
; CHECK-LABEL: name: i8_func_void
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK: bb.1 (%ir-block.0):
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
%val = load i8, i8 addrspace(1)* undef
ret i8 %val
}
define zeroext i8 @i8_zeroext_func_void() #0 {
; CHECK-LABEL: name: i8_zeroext_func_void
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK: bb.1 (%ir-block.0):
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD]](s8)
+ ; CHECK: $vgpr0 = COPY [[ZEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
%val = load i8, i8 addrspace(1)* undef
ret i8 %val
}
define signext i8 @i8_signext_func_void() #0 {
; CHECK-LABEL: name: i8_signext_func_void
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK: bb.1 (%ir-block.0):
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `i8 addrspace(1)* undef`, addrspace 1)
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD]](s8)
+ ; CHECK: $vgpr0 = COPY [[SEXT]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0
%val = load i8, i8 addrspace(1)* undef
ret i8 %val
}
@@ -151,12 +202,44 @@ define i48 @i48_func_void() #0 {
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1)
- ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
- ; CHECK: [[INSERT:%[0-9]+]]:_(s64) = G_INSERT [[DEF1]], [[LOAD]](s48), 0
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s64), 32
- ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
- ; CHECK: $vgpr1 = COPY [[EXTRACT1]](s32)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[LOAD]](s48)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s64)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+ %val = load i48, i48 addrspace(1)* undef, align 8
+ ret i48 %val
+}
+
+define signext i48 @i48_signext_func_void() #0 {
+ ; CHECK-LABEL: name: i48_signext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD]](s48)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s64)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
+ %val = load i48, i48 addrspace(1)* undef, align 8
+ ret i48 %val
+}
+
+define zeroext i48 @i48_zeroext_func_void() #0 {
+ ; CHECK-LABEL: name: i48_zeroext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s48) = G_LOAD [[DEF]](p1) :: (load 6 from `i48 addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD]](s48)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s64)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
%val = load i48, i48 addrspace(1)* undef, align 8
@@ -186,14 +269,47 @@ define i65 @i65_func_void() #0 {
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1)
- ; CHECK: [[DEF1:%[0-9]+]]:_(s96) = G_IMPLICIT_DEF
- ; CHECK: [[INSERT:%[0-9]+]]:_(s96) = G_INSERT [[DEF1]], [[LOAD]](s65), 0
- ; CHECK: [[EXTRACT:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 0
- ; CHECK: [[EXTRACT1:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 32
- ; CHECK: [[EXTRACT2:%[0-9]+]]:_(s32) = G_EXTRACT [[INSERT]](s96), 64
- ; CHECK: $vgpr0 = COPY [[EXTRACT]](s32)
- ; CHECK: $vgpr1 = COPY [[EXTRACT1]](s32)
- ; CHECK: $vgpr2 = COPY [[EXTRACT2]](s32)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s96) = G_ANYEXT [[LOAD]](s65)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s96)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: $vgpr2 = COPY [[UV2]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ %val = load i65, i65 addrspace(1)* undef
+ ret i65 %val
+}
+
+define signext i65 @i65_signext_func_void() #0 {
+ ; CHECK-LABEL: name: i65_signext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s96) = G_SEXT [[LOAD]](s65)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s96)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: $vgpr2 = COPY [[UV2]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ %val = load i65, i65 addrspace(1)* undef
+ ret i65 %val
+}
+
+define zeroext i65 @i65_zeroext_func_void() #0 {
+ ; CHECK-LABEL: name: i65_zeroext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s65) = G_LOAD [[DEF]](p1) :: (load 9 from `i65 addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s96) = G_ZEXT [[LOAD]](s65)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s96)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: $vgpr2 = COPY [[UV2]](s32)
; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%val = load i65, i65 addrspace(1)* undef
@@ -854,16 +970,19 @@ define <4 x i8> @v4i8_func_void() #0 {
define {i8, i32} @struct_i8_i32_func_void() #0 {
; CHECK-LABEL: name: struct_i8_i32_func_void
- ; CHECK: bb.0:
- ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $sgpr30_sgpr31
; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
- ; CHECK: bb.1 (%ir-block.0):
; CHECK: [[LOAD:%[0-9]+]]:_(s8) = G_LOAD [[DEF]](p1) :: (load 1 from `{ i8, i32 } addrspace(1)* undef`, align 4, addrspace 1)
; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C]](s64)
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load 4 from `{ i8, i32 } addrspace(1)* undef` + 4, addrspace 1)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LOAD]](s8)
+ ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; CHECK: $vgpr1 = COPY [[LOAD1]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1
%val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef
ret { i8, i32 } %val
}
@@ -1060,4 +1179,145 @@ define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0
ret void
}
+define i1022 @i1022_func_void() #0 {
+ ; CHECK-LABEL: name: i1022_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s1024) = G_ANYEXT [[LOAD]](s1022)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ANYEXT]](s1024)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: $vgpr2 = COPY [[UV2]](s32)
+ ; CHECK: $vgpr3 = COPY [[UV3]](s32)
+ ; CHECK: $vgpr4 = COPY [[UV4]](s32)
+ ; CHECK: $vgpr5 = COPY [[UV5]](s32)
+ ; CHECK: $vgpr6 = COPY [[UV6]](s32)
+ ; CHECK: $vgpr7 = COPY [[UV7]](s32)
+ ; CHECK: $vgpr8 = COPY [[UV8]](s32)
+ ; CHECK: $vgpr9 = COPY [[UV9]](s32)
+ ; CHECK: $vgpr10 = COPY [[UV10]](s32)
+ ; CHECK: $vgpr11 = COPY [[UV11]](s32)
+ ; CHECK: $vgpr12 = COPY [[UV12]](s32)
+ ; CHECK: $vgpr13 = COPY [[UV13]](s32)
+ ; CHECK: $vgpr14 = COPY [[UV14]](s32)
+ ; CHECK: $vgpr15 = COPY [[UV15]](s32)
+ ; CHECK: $vgpr16 = COPY [[UV16]](s32)
+ ; CHECK: $vgpr17 = COPY [[UV17]](s32)
+ ; CHECK: $vgpr18 = COPY [[UV18]](s32)
+ ; CHECK: $vgpr19 = COPY [[UV19]](s32)
+ ; CHECK: $vgpr20 = COPY [[UV20]](s32)
+ ; CHECK: $vgpr21 = COPY [[UV21]](s32)
+ ; CHECK: $vgpr22 = COPY [[UV22]](s32)
+ ; CHECK: $vgpr23 = COPY [[UV23]](s32)
+ ; CHECK: $vgpr24 = COPY [[UV24]](s32)
+ ; CHECK: $vgpr25 = COPY [[UV25]](s32)
+ ; CHECK: $vgpr26 = COPY [[UV26]](s32)
+ ; CHECK: $vgpr27 = COPY [[UV27]](s32)
+ ; CHECK: $vgpr28 = COPY [[UV28]](s32)
+ ; CHECK: $vgpr29 = COPY [[UV29]](s32)
+ ; CHECK: $vgpr30 = COPY [[UV30]](s32)
+ ; CHECK: $vgpr31 = COPY [[UV31]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
+ %val = load i1022, i1022 addrspace(1)* undef
+ ret i1022 %val
+}
+
+define signext i1022 @i1022_signext_func_void() #0 {
+ ; CHECK-LABEL: name: i1022_signext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s1024) = G_SEXT [[LOAD]](s1022)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[SEXT]](s1024)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: $vgpr2 = COPY [[UV2]](s32)
+ ; CHECK: $vgpr3 = COPY [[UV3]](s32)
+ ; CHECK: $vgpr4 = COPY [[UV4]](s32)
+ ; CHECK: $vgpr5 = COPY [[UV5]](s32)
+ ; CHECK: $vgpr6 = COPY [[UV6]](s32)
+ ; CHECK: $vgpr7 = COPY [[UV7]](s32)
+ ; CHECK: $vgpr8 = COPY [[UV8]](s32)
+ ; CHECK: $vgpr9 = COPY [[UV9]](s32)
+ ; CHECK: $vgpr10 = COPY [[UV10]](s32)
+ ; CHECK: $vgpr11 = COPY [[UV11]](s32)
+ ; CHECK: $vgpr12 = COPY [[UV12]](s32)
+ ; CHECK: $vgpr13 = COPY [[UV13]](s32)
+ ; CHECK: $vgpr14 = COPY [[UV14]](s32)
+ ; CHECK: $vgpr15 = COPY [[UV15]](s32)
+ ; CHECK: $vgpr16 = COPY [[UV16]](s32)
+ ; CHECK: $vgpr17 = COPY [[UV17]](s32)
+ ; CHECK: $vgpr18 = COPY [[UV18]](s32)
+ ; CHECK: $vgpr19 = COPY [[UV19]](s32)
+ ; CHECK: $vgpr20 = COPY [[UV20]](s32)
+ ; CHECK: $vgpr21 = COPY [[UV21]](s32)
+ ; CHECK: $vgpr22 = COPY [[UV22]](s32)
+ ; CHECK: $vgpr23 = COPY [[UV23]](s32)
+ ; CHECK: $vgpr24 = COPY [[UV24]](s32)
+ ; CHECK: $vgpr25 = COPY [[UV25]](s32)
+ ; CHECK: $vgpr26 = COPY [[UV26]](s32)
+ ; CHECK: $vgpr27 = COPY [[UV27]](s32)
+ ; CHECK: $vgpr28 = COPY [[UV28]](s32)
+ ; CHECK: $vgpr29 = COPY [[UV29]](s32)
+ ; CHECK: $vgpr30 = COPY [[UV30]](s32)
+ ; CHECK: $vgpr31 = COPY [[UV31]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
+ %val = load i1022, i1022 addrspace(1)* undef
+ ret i1022 %val
+}
+
+define zeroext i1022 @i1022_zeroext_func_void() #0 {
+ ; CHECK-LABEL: name: i1022_zeroext_func_void
+ ; CHECK: bb.1 (%ir-block.0):
+ ; CHECK: liveins: $sgpr30_sgpr31
+ ; CHECK: [[COPY:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
+ ; CHECK: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF
+ ; CHECK: [[LOAD:%[0-9]+]]:_(s1022) = G_LOAD [[DEF]](p1) :: (load 128 from `i1022 addrspace(1)* undef`, align 8, addrspace 1)
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s1024) = G_ZEXT [[LOAD]](s1022)
+ ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ZEXT]](s1024)
+ ; CHECK: $vgpr0 = COPY [[UV]](s32)
+ ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+ ; CHECK: $vgpr2 = COPY [[UV2]](s32)
+ ; CHECK: $vgpr3 = COPY [[UV3]](s32)
+ ; CHECK: $vgpr4 = COPY [[UV4]](s32)
+ ; CHECK: $vgpr5 = COPY [[UV5]](s32)
+ ; CHECK: $vgpr6 = COPY [[UV6]](s32)
+ ; CHECK: $vgpr7 = COPY [[UV7]](s32)
+ ; CHECK: $vgpr8 = COPY [[UV8]](s32)
+ ; CHECK: $vgpr9 = COPY [[UV9]](s32)
+ ; CHECK: $vgpr10 = COPY [[UV10]](s32)
+ ; CHECK: $vgpr11 = COPY [[UV11]](s32)
+ ; CHECK: $vgpr12 = COPY [[UV12]](s32)
+ ; CHECK: $vgpr13 = COPY [[UV13]](s32)
+ ; CHECK: $vgpr14 = COPY [[UV14]](s32)
+ ; CHECK: $vgpr15 = COPY [[UV15]](s32)
+ ; CHECK: $vgpr16 = COPY [[UV16]](s32)
+ ; CHECK: $vgpr17 = COPY [[UV17]](s32)
+ ; CHECK: $vgpr18 = COPY [[UV18]](s32)
+ ; CHECK: $vgpr19 = COPY [[UV19]](s32)
+ ; CHECK: $vgpr20 = COPY [[UV20]](s32)
+ ; CHECK: $vgpr21 = COPY [[UV21]](s32)
+ ; CHECK: $vgpr22 = COPY [[UV22]](s32)
+ ; CHECK: $vgpr23 = COPY [[UV23]](s32)
+ ; CHECK: $vgpr24 = COPY [[UV24]](s32)
+ ; CHECK: $vgpr25 = COPY [[UV25]](s32)
+ ; CHECK: $vgpr26 = COPY [[UV26]](s32)
+ ; CHECK: $vgpr27 = COPY [[UV27]](s32)
+ ; CHECK: $vgpr28 = COPY [[UV28]](s32)
+ ; CHECK: $vgpr29 = COPY [[UV29]](s32)
+ ; CHECK: $vgpr30 = COPY [[UV30]](s32)
+ ; CHECK: $vgpr31 = COPY [[UV31]](s32)
+ ; CHECK: [[COPY1:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY]]
+ ; CHECK: S_SETPC_B64_return [[COPY1]], implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7, implicit $vgpr8, implicit $vgpr9, implicit $vgpr10, implicit $vgpr11, implicit $vgpr12, implicit $vgpr13, implicit $vgpr14, implicit $vgpr15, implicit $vgpr16, implicit $vgpr17, implicit $vgpr18, implicit $vgpr19, implicit $vgpr20, implicit $vgpr21, implicit $vgpr22, implicit $vgpr23, implicit $vgpr24, implicit $vgpr25, implicit $vgpr26, implicit $vgpr27, implicit $vgpr28, implicit $vgpr29, implicit $vgpr30, implicit $vgpr31
+ %val = load i1022, i1022 addrspace(1)* undef
+ ret i1022 %val
+}
+
attributes #0 = { nounwind }
More information about the llvm-commits
mailing list