[llvm-branch-commits] [llvm] f431123 - resolved issues with ret v2i8
Jeffrey Byrnes via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Oct 17 10:42:34 PDT 2022
Author: Jeffrey Byrnes
Date: 2022-10-13T14:13:27-07:00
New Revision: f431123ac5be268c4707d7f16878039c6051e71c
URL: https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c
DIFF: https://github.com/llvm/llvm-project/commit/f431123ac5be268c4707d7f16878039c6051e71c.diff
LOG: resolved issues with ret v2i8
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 528ee108408f..081d8d96c9e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -958,6 +958,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
/// Return a legal replacement for the given operation, with all legal operands.
void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
+ errs() << "\n\n\nDAG BEFORE\n";
+ DAG.dump();
+ errs() << "\n";
+
LLVM_DEBUG(dbgs() << "\nLegalizing: "; Node->dump(&DAG));
// Allow illegal target nodes and illegal registers.
@@ -1310,10 +1314,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
return;
case TargetLowering::Custom:
+ errs() << "from legalizeDAG.cpp\n";
LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
// FIXME: The handling for custom lowering with multiple results is
// a complete mess.
if (SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG)) {
+ errs() << "TLI.LowerOperation returned\n";
if (!(Res.getNode() != Node || Res.getResNo() != 0))
return;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 95cd5371814e..5573acb5f6e5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1131,6 +1131,7 @@ SDValue SelectionDAGBuilder::getControlRoot() {
void SelectionDAGBuilder::visit(const Instruction &I) {
// Set up outgoing PHI node register values before emitting the terminator.
if (I.isTerminator()) {
+ errs() << "Is terminator\n";
HandlePHINodesInSuccessorBlocks(I.getParent());
}
@@ -1149,6 +1150,7 @@ void SelectionDAGBuilder::visit(const Instruction &I) {
DAG, [&](SDNode *) { NodeInserted = true; });
}
+ errs() << "calling visit with opcode " << I.getOpcodeName() << "\n";
visit(I.getOpcode(), I);
if (!I.isTerminator() && !HasTailCall &&
@@ -1936,6 +1938,7 @@ void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
}
void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
+ errs() << "in visitRet\n";
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
auto &DL = DAG.getDataLayout();
SDValue Chain = getControlRoot();
@@ -1955,6 +1958,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
}
if (!FuncInfo.CanLowerReturn) {
+ errs() << "!CanLowerReturn\n";
unsigned DemoteReg = FuncInfo.DemoteRegister;
const Function *F = I.getParent()->getParent();
@@ -1998,9 +2002,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
+ errs() << "CanReturn && NumOpers !=0\n";
SmallVector<EVT, 4> ValueVTs;
ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
unsigned NumValues = ValueVTs.size();
+ errs() << "NumValues: " << NumValues << "\n";
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
@@ -2027,9 +2033,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
CallingConv::ID CC = F->getCallingConv();
+ errs() << "calling getNumRegs for CallConv\n";
unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
SmallVector<SDValue, 4> Parts(NumParts);
+ errs() << "Calling getCopyToParts with NumParts: " << NumParts << "\n";
getCopyToParts(DAG, getCurSDLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
&Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
@@ -2067,6 +2075,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
}
}
+ errs() << "Made it passed end of condition\n";
+
// Push in swifterror virtual register as the last element of Outs. This makes
// sure swifterror virtual register will be returned in the swifterror
// physical register.
@@ -2086,6 +2096,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
EVT(TLI.getPointerTy(DL))));
}
+ errs() << "doing calling conv stuff\n";
bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
CallingConv::ID CallConv =
DAG.getMachineFunction().getFunction().getCallingConv();
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 52b8eee7bcc7..35b099e8f1f0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -371,6 +371,7 @@ static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
}
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+ errs() << "SelectionDAGIsel ROMF\n";
// If we already selected that function, we do not need to run SDISel.
if (mf.getProperties().hasProperty(
MachineFunctionProperties::Property::Selected))
@@ -673,8 +674,12 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Lower the instructions. If a call is emitted as a tail call, cease emitting
// nodes for this block.
for (BasicBlock::const_iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
- if (!ElidedArgCopyInstrs.count(&*I))
+ if (!ElidedArgCopyInstrs.count(&*I)) {
+ errs() << "SelectBB :SDB visit:";
+ I->dump();
+ errs() << "\n";
SDB->visit(*I);
+ }
}
// Make sure the root of the DAG is up-to-date.
@@ -1342,6 +1347,7 @@ static void processDbgDeclares(FunctionLoweringInfo &FuncInfo) {
}
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
+ errs() << "Select ALL basic blocks\n";
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = nullptr;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 523788106db6..86683544a4c9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -22,14 +22,14 @@ def CC_SI_Gfx : CallingConv<[
// 32 is reserved for the stack pointer
// 33 is reserved for the frame pointer
// 34 is reserved for the base pointer
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[
SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29
]>>>,
- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -43,7 +43,7 @@ def RetCC_SI_Gfx : CallingConv<[
CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -66,7 +66,7 @@ def RetCC_SI_Gfx : CallingConv<[
def CC_SI_SHADER : CallingConv<[
- CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[
+ CCIfInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -76,7 +76,7 @@ def CC_SI_SHADER : CallingConv<[
]>>>,
// 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
- CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8] , CCAssignToReg<[
+ CCIfNotInReg<CCIfType<[f32, i32, f16, i16, v2i16, v2f16, v4i8, v2i8] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
@@ -99,7 +99,7 @@ def CC_SI_SHADER : CallingConv<[
def RetCC_SI_Shader : CallingConv<[
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
- CCIfType<[i32, i16, v4i8] , CCAssignToReg<[
+ CCIfType<[i32, i16, v4i8, v2i8] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
@@ -183,19 +183,19 @@ def CC_AMDGPU_Func : CallingConv<[
CCIfByVal<CCPassByVal<4, 4>>,
CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i8, i16], CCIfExtend<CCPromoteToType<i32>>>,
- CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, i1], CCAssignToReg<[
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, v2i8, i1], CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31]>>,
- CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, i1], CCAssignToStack<4, 4>>
+ CCIfType<[i32, f32, v2i16, v2f16, i16, f16, v4i8, v2i8, i1], CCAssignToStack<4, 4>>
]>;
// Calling convention for leaf functions
def RetCC_AMDGPU_Func : CallingConv<[
CCIfType<[i1], CCPromoteToType<i32>>,
CCIfType<[i1, i16], CCIfExtend<CCPromoteToType<i32>>>,
- CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8], CCAssignToReg<[
+ CCIfType<[i32, f32, i16, f16, v2i16, v2f16, v4i8, v2i8], CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 303cbe5657bc..62ac1fcd95ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -124,6 +124,7 @@ AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(
}
bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+ errs() << "amdgpuisel ROMF\n";
#ifdef EXPENSIVE_CHECKS
DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9980e851f982..d814733580b6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -1141,6 +1141,7 @@ SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
SelectionDAG &DAG) const {
+ errs() << "AMDGPUTL::LowerOp\n";
switch (Op.getOpcode()) {
default:
Op->print(errs(), &DAG);
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index f92bde72867a..f9638eda346d 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -85,6 +85,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::v4i8, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::v2i8, &AMDGPU::SReg_32RegClass);
+ addRegisterClass(MVT::i8, &AMDGPU::SReg_32RegClass);
addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
@@ -651,7 +652,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECTOR_SHUFFLE,
{MVT::v4f16, MVT::v4i16, MVT::v8f16, MVT::v8i16,
- MVT::v16f16, MVT::v16i16, MVT::v4i8},
+ MVT::v16f16, MVT::v16i16, MVT::v4i8, MVT::v2i8},
Custom);
for (MVT VT : {MVT::v4i16, MVT::v8i16, MVT::v16i16})
@@ -851,7 +852,7 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
EVT VT) const {
if (CC == CallingConv::AMDGPU_KERNEL)
return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
-
+
if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
EVT ScalarVT = VT.getScalarType();
@@ -859,7 +860,7 @@ unsigned SITargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
// FIXME: Should probably promote 8-bit vectors to i16.
if (Size == 8 && Subtarget->has16BitInsts())
- return (NumElts + 1) / 4;
+ return (NumElts + 2) / 4;
if (Size == 16 && Subtarget->has16BitInsts())
return (NumElts + 1) / 2;
@@ -4670,6 +4671,10 @@ SDValue SITargetLowering::splitTernaryVectorOp(SDValue Op,
SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+ errs() << "SITL::LowerOp on op\n";
+ Op.dump();
+ errs() << "\n";
+
switch (Op.getOpcode()) {
default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
@@ -4703,6 +4708,7 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::INSERT_VECTOR_ELT:
return lowerINSERT_VECTOR_ELT(Op, DAG);
case ISD::EXTRACT_VECTOR_ELT:
+ errs() << "calling lowerEVE\n";
return lowerEXTRACT_VECTOR_ELT(Op, DAG);
case ISD::VECTOR_SHUFFLE:
return lowerVECTOR_SHUFFLE(Op, DAG);
@@ -5779,6 +5785,8 @@ SDValue SITargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
SDLoc SL(Op);
+ errs() << "in lowerEVE\n";
+
EVT ResultVT = Op.getValueType();
SDValue Vec = Op.getOperand(0);
@@ -5879,6 +5887,7 @@ static bool elementPairIsContiguous(ArrayRef<int> Mask, int Elt) {
SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
SelectionDAG &DAG) const {
+ errs() << "in SIISelLowering lowerVECTOR_SHUFFLE\n";
SDLoc SL(Op);
EVT ResultVT = Op.getValueType();
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
@@ -5887,6 +5896,7 @@ SDValue SITargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
EVT EltVT;
auto ScalarSize = ResultVT.getVectorElementType().getSizeInBits() ;
if (ScalarSize == 8) {
+ // PackVT is
PackVT = MVT::v2i8;
}
else {
@@ -5958,6 +5968,8 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
SDLoc SL(Op);
EVT VT = Op.getValueType();
+ errs() << "in lowerBuild_Vector with VT: " << VT.getEVTString() << "\n";
+
if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
VT == MVT::v8i16 || VT == MVT::v8f16) {
EVT HalfVT = MVT::getVectorVT(VT.getVectorElementType().getSimpleVT(),
@@ -6013,7 +6025,8 @@ SDValue SITargetLowering::lowerBUILD_VECTOR(SDValue Op,
EVT BCVT = (SrcVT) == MVT::f16 ? MVT::i16 : SrcVT;
unsigned VecSize = VT.getSizeInBits(); // 16, 32
- EVT EltVT = SrcVT.getVectorElementType();
+ EVT EltVT = SrcVT.isVector() ? SrcVT.getVectorElementType() : SrcVT.getScalarType();
+
unsigned EltSize = EltVT.getSizeInBits();
SDValue ScaleFactor = DAG.getConstant(Log2_32(EltSize), SL, MVT::i32);
More information about the llvm-branch-commits
mailing list