[PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel
Tom Stellard
tom at stellard.net
Wed Sep 11 07:22:53 PDT 2013
On Sun, Sep 08, 2013 at 10:42:27AM -0700, Vincent Lejeune wrote:
> I reworked a little this patch and also refactored MOV_IMM_* and CLAMP_R600
> handling code into the R600 postIsel hook in 2 others patches.
>
> Vincent
All three patches are:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
>
>
>
> ----- Mail original -----
> > De?: Tom Stellard <tom at stellard.net>
> > ??: Vincent Lejeune <vljn at ovi.com>
> > Cc?: llvm-commits at cs.uiuc.edu
> > Envoy? le : Jeudi 5 septembre 2013 17h49
> > Objet?: Re: [PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel
> >
> > On Tue, Sep 03, 2013 at 01:11:35AM +0200, Vincent Lejeune wrote:
> >> This move makes possible to correctly handle multiples instructions
> >> from a single pattern.
> >
> > Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> >
> >> ---
> >> ? lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262
> > +++------------------------------
> >> ? lib/Target/R600/R600ISelLowering.cpp? | 178 ++++++++++++++++++++++
> >> ? lib/Target/R600/R600ISelLowering.h? ? |? 1 +
> >> ? test/CodeGen/R600/complex-folding.ll? |? 18 +++
> >> ? 4 files changed, 214 insertions(+), 245 deletions(-)
> >> ? create mode 100644 test/CodeGen/R600/complex-folding.ll
> >>
> >> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> > b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> >> index f222901..e099282 100644
> >> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> >> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> >> @@ -169,92 +169,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> >> ? ? }
> >> ? ? switch (Opc) {
> >> ? ? default: break;
> >> -? case AMDGPUISD::CONST_ADDRESS: {
> >> -? ? for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
> >> -? ? ? ? ? ? ? ? ? ? ? ? ? ? ? I != SDNode::use_end(); I = Next) {
> >> -? ? ? Next = llvm::next(I);
> >> -? ? ? if (!I->isMachineOpcode()) {
> >> -? ? ? ? continue;
> >> -? ? ? }
> >> -? ? ? unsigned Opcode = I->getMachineOpcode();
> >> -? ? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst)
> >> -1;
> >> -? ? ? int SrcIdx = I.getOperandNo();
> >> -? ? ? int SelIdx;
> >> -? ? ? // Unlike MachineInstrs, SDNodes do not have results in their
> > operand
> >> -? ? ? // list, so we need to increment the SrcIdx, since
> >> -? ? ? // R600InstrInfo::getOperandIdx is based on the MachineInstr
> > indices.
> >> -? ? ? if (HasDst) {
> >> -? ? ? ? SrcIdx++;
> >> -? ? ? }
> >> -
> >> -? ? ? SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
> >> -? ? ? if (SelIdx < 0) {
> >> -? ? ? ? continue;
> >> -? ? ? }
> >> -
> >> -? ? ? SDValue CstOffset;
> >> -? ? ? if (N->getValueType(0).isVector() ||
> >> -? ? ? ? ? !SelectGlobalValueConstantOffset(N->getOperand(0),
> > CstOffset))
> >> -? ? ? ? continue;
> >> -
> >> -? ? ? // Gather constants values
> >> -? ? ? int SrcIndices[] = {
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >> -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >> -? ? ? };
> >> -? ? ? std::vector<unsigned> Consts;
> >> -? ? ? for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++)
> > {
> >> -? ? ? ? int OtherSrcIdx = SrcIndices[i];
> >> -? ? ? ? int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> >> -? ? ? ? if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
> >> -? ? ? ? ? continue;
> >> -? ? ? ? }
> >> -? ? ? ? if (HasDst) {
> >> -? ? ? ? ? OtherSrcIdx--;
> >> -? ? ? ? ? OtherSelIdx--;
> >> -? ? ? ? }
> >> -? ? ? ? if (RegisterSDNode *Reg =
> >> -? ? ? ? ? ? ? ? ? ? ? ?
> > dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
> >> -? ? ? ? ? if (Reg->getReg() == AMDGPU::ALU_CONST) {
> >> -? ? ? ? ? ? ConstantSDNode *Cst =
> > dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
> >> -? ? ? ? ? ? Consts.push_back(Cst->getZExtValue());
> >> -? ? ? ? ? }
> >> -? ? ? ? }
> >> -? ? ? }
> >> -
> >> -? ? ? ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> >> -? ? ? Consts.push_back(Cst->getZExtValue());
> >> -? ? ? if (!TII->fitsConstReadLimitations(Consts))
> >> -? ? ? ? continue;
> >> -
> >> -? ? ? // Convert back to SDNode indices
> >> -? ? ? if (HasDst) {
> >> -? ? ? ? SrcIdx--;
> >> -? ? ? ? SelIdx--;
> >> -? ? ? }
> >> -? ? ? std::vector<SDValue> Ops;
> >> -? ? ? for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
> >> -? ? ? ? if (i == SrcIdx) {
> >> -? ? ? ? ? Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST,
> > MVT::f32));
> >> -? ? ? ? } else if (i == SelIdx) {
> >> -? ? ? ? ? Ops.push_back(CstOffset);
> >> -? ? ? ? } else {
> >> -? ? ? ? ? Ops.push_back(I->getOperand(i));
> >> -? ? ? ? }
> >> -? ? ? }
> >> -? ? ? CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
> >> -? ? }
> >> -? ? break;
> >> -? }
> >> ? ? case ISD::BUILD_VECTOR: {
> >> ? ? ? const AMDGPUSubtarget &ST =
> > TM.getSubtarget<AMDGPUSubtarget>();
> >> ? ? ? if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
> >> @@ -422,38 +336,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> >> ? ? if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> >> ? ? ? const R600InstrInfo *TII =
> >> ? ? ? ? ? static_cast<const R600InstrInfo*>(TM.getInstrInfo());
> >> -? ? if (Result && Result->isMachineOpcode() &&
> > Result->getMachineOpcode() == AMDGPU::DOT_4) {
> >> -? ? ? bool IsModified = false;
> >> -? ? ? do {
> >> -? ? ? ? std::vector<SDValue> Ops;
> >> -? ? ? ? for(SDNode::op_iterator I = Result->op_begin(), E =
> > Result->op_end();
> >> -? ? ? ? ? ? I != E; ++I)
> >> -? ? ? ? ? Ops.push_back(*I);
> >> -? ? ? ? IsModified = FoldDotOperands(Result->getMachineOpcode(), TII,
> > Ops);
> >> -? ? ? ? if (IsModified) {
> >> -? ? ? ? ? Result = CurDAG->UpdateNodeOperands(Result, Ops.data(),
> > Ops.size());
> >> -? ? ? ? }
> >> -? ? ? } while (IsModified);
> >> -
> >> -? ? }
> >> ? ? ? if (Result && Result->isMachineOpcode() &&
> >> ? ? ? ? ? !(TII->get(Result->getMachineOpcode()).TSFlags &
> > R600_InstFlag::VECTOR)
> >> ? ? ? ? ? &&
> > TII->hasInstrModifiers(Result->getMachineOpcode())) {
> >> -? ? ? // Fold FNEG/FABS
> >> -? ? ? // TODO: Isel can generate multiple MachineInst, we need to
> > recursively
> >> -? ? ? // parse Result
> >> -? ? ? bool IsModified = false;
> >> -? ? ? do {
> >> -? ? ? ? std::vector<SDValue> Ops;
> >> -? ? ? ? for(SDNode::op_iterator I = Result->op_begin(), E =
> > Result->op_end();
> >> -? ? ? ? ? ? I != E; ++I)
> >> -? ? ? ? ? Ops.push_back(*I);
> >> -? ? ? ? IsModified = FoldOperands(Result->getMachineOpcode(), TII,
> > Ops);
> >> -? ? ? ? if (IsModified) {
> >> -? ? ? ? ? Result = CurDAG->UpdateNodeOperands(Result, Ops.data(),
> > Ops.size());
> >> -? ? ? ? }
> >> -? ? ? } while (IsModified);
> >> -
> >> ? ? ? ? // If node has a single use which is CLAMP_R600, folds it
> >> ? ? ? ? if (Result->hasOneUse() && Result->isMachineOpcode())
> > {
> >> ? ? ? ? ? SDNode *PotentialClamp = *Result->use_begin();
> >> @@ -478,120 +363,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> >> ? ? return Result;
> >> ? }
> >> ?
> >> -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel,
> > SDValue &Neg,
> >> -? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SDValue &Abs, const R600InstrInfo
> > *TII) {
> >> -? switch (Src.getOpcode()) {
> >> -? case ISD::FNEG:
> >> -? ? Src = Src.getOperand(0);
> >> -? ? Neg = CurDAG->getTargetConstant(1, MVT::i32);
> >> -? ? return true;
> >> -? case ISD::FABS:
> >> -? ? if (!Abs.getNode())
> >> -? ? ? return false;
> >> -? ? Src = Src.getOperand(0);
> >> -? ? Abs = CurDAG->getTargetConstant(1, MVT::i32);
> >> -? ? return true;
> >> -? case ISD::BITCAST:
> >> -? ? Src = Src.getOperand(0);
> >> -? ? return true;
> >> -? default:
> >> -? ? return false;
> >> -? }
> >> -}
> >> -
> >> -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
> >> -? ? const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> >> -? int OperandIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> >> -? };
> >> -? int SelIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
> >> -? };
> >> -? int NegIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> >> -? };
> >> -? int AbsIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> >> -? ? -1
> >> -? };
> >> -
> >> -
> >> -? for (unsigned i = 0; i < 3; i++) {
> >> -? ? if (OperandIdx[i] < 0)
> >> -? ? ? return false;
> >> -? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >> -? ? SDValue &Sel = Ops[SelIdx[i] - 1];
> >> -? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >> -? ? SDValue FakeAbs;
> >> -? ? SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
> >> -? ? if (FoldOperand(Src, Sel, Neg, Abs, TII))
> >> -? ? ? return true;
> >> -? }
> >> -? return false;
> >> -}
> >> -
> >> -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
> >> -? ? const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> >> -? int OperandIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >> -? };
> >> -? int SelIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
> >> -? };
> >> -? int NegIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> >> -? };
> >> -? int AbsIdx[] = {
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> >> -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> >> -? };
> >> -
> >> -? for (unsigned i = 0; i < 8; i++) {
> >> -? ? if (OperandIdx[i] < 0)
> >> -? ? ? return false;
> >> -? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >> -? ? SDValue &Sel = Ops[SelIdx[i] - 1];
> >> -? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >> -? ? SDValue &Abs = Ops[AbsIdx[i] - 1];
> >> -? ? if (FoldOperand(Src, Sel, Neg, Abs, TII))
> >> -? ? ? return true;
> >> -? }
> >> -? return false;
> >> -}
> >> ?
> >> ? bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int
> > addrspace) {
> >> ? ? if (!ptr) {
> >> @@ -804,26 +575,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op,
> > SDValue &U24) {
> >> ? }
> >> ?
> >> ? void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
> >> -
> >> -? if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
> > {
> >> -? ? return;
> >> -? }
> >> -
> >> -? // Go over all selected nodes and try to fold them a bit more
> >> ? ? const AMDGPUTargetLowering& Lowering =
> >> ? ? ? (*(const AMDGPUTargetLowering*)getTargetLowering());
> >> -? for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> >> -? ? ? E = CurDAG->allnodes_end(); I != E; ++I) {
> >> +? bool IsModified = false;
> >> +? do {
> >> +? ? IsModified = false;
> >> +? ? // Go over all selected nodes and try to fold them a bit more
> >> +? ? for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> >> +? ? ? ? E = CurDAG->allnodes_end(); I != E; ++I) {
> >> ?
> >> -? ? SDNode *Node = I;
> >> +? ? ? SDNode *Node = I;
> >> ?
> >> -? ? MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> >> -? ? if (!MachineNode)
> >> -? ? ? continue;
> >> +? ? ? MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> >> +? ? ? if (!MachineNode)
> >> +? ? ? ? continue;
> >> ?
> >> -? ? SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> >> -? ? if (ResNode != Node) {
> >> -? ? ? ReplaceUses(Node, ResNode);
> >> +? ? ? SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> >> +? ? ? if (ResNode != Node) {
> >> +? ? ? ? ReplaceUses(Node, ResNode);
> >> +? ? ? ? IsModified = true;
> >> +? ? ? }
> >> ? ? ? }
> >> -? }
> >> +? ? CurDAG->RemoveDeadNodes();
> >> +? } while (IsModified);
> >> ? }
> >> diff --git a/lib/Target/R600/R600ISelLowering.cpp
> > b/lib/Target/R600/R600ISelLowering.cpp
> >> index a89875c..4f0c420 100644
> >> --- a/lib/Target/R600/R600ISelLowering.cpp
> >> +++ b/lib/Target/R600/R600ISelLowering.cpp
> >> @@ -1586,3 +1586,181 @@ SDValue
> > R600TargetLowering::PerformDAGCombine(SDNode *N,
> >> ? ? }
> >> ? ? return SDValue();
> >> ? }
> >> +
> >> +static bool
> >> +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue
> > &Neg,
> >> +? ? ? ? ? ? SDValue &Abs, SDValue &Sel, SelectionDAG &DAG)
> > {
> >> +? const R600InstrInfo *TII =
> >> +? ? ? static_cast<const R600InstrInfo
> > *>(DAG.getTarget().getInstrInfo());
> >> +? if (!Src.isMachineOpcode())
> >> +? ? return false;
> >> +? switch (Src.getMachineOpcode()) {
> >> +? case AMDGPU::FNEG_R600:
> >> +? ? Src = Src.getOperand(0);
> >> +? ? Neg = DAG.getTargetConstant(1, MVT::i32);
> >> +? ? return true;
> >> +? case AMDGPU::FABS_R600:
> >> +? ? if (!Abs.getNode())
> >> +? ? ? return false;
> >> +? ? Src = Src.getOperand(0);
> >> +? ? Abs = DAG.getTargetConstant(1, MVT::i32);
> >> +? ? return true;
> >> +? case AMDGPU::CONST_COPY: {
> >> +? ? unsigned Opcode = ParentNode->getMachineOpcode();
> >> +? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) >
> > -1;
> >> +
> >> +? ? if (!Sel.getNode())
> >> +? ? ? return false;
> >> +
> >> +? ? SDValue CstOffset = Src.getOperand(0);
> >> +? ? if (ParentNode->getValueType(0).isVector())
> >> +? ? ? return false;
> >> +
> >> +? ? // Gather constants values
> >> +? ? int SrcIndices[] = {
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >> +? ? };
> >> +? ? std::vector<unsigned> Consts;
> >> +? ? for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++)
> > {
> >> +? ? ? int OtherSrcIdx = SrcIndices[i];
> >> +? ? ? int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> >> +? ? ? if (OtherSrcIdx < 0 || OtherSelIdx < 0)
> >> +? ? ? ? continue;
> >> +? ? ? if (HasDst) {
> >> +? ? ? ? OtherSrcIdx--;
> >> +? ? ? ? OtherSelIdx--;
> >> +? ? ? }
> >> +? ? ? if (RegisterSDNode *Reg =
> >> +? ? ? ? ?
> > dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
> >> +? ? ? ? if (Reg->getReg() == AMDGPU::ALU_CONST) {
> >> +? ? ? ? ? ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
> >> +? ? ? ? ? ? ? ParentNode->getOperand(OtherSelIdx));
> >> +? ? ? ? ? Consts.push_back(Cst->getZExtValue());
> >> +? ? ? ? }
> >> +? ? ? }
> >> +? ? }
> >> +
> >> +? ? ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> >> +? ? Consts.push_back(Cst->getZExtValue());
> >> +? ? if (!TII->fitsConstReadLimitations(Consts)) {
> >> +? ? ? return false;
> >> +? ? }
> >> +
> >> +? ? Sel = CstOffset;
> >> +? ? Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
> >> +? ? return true;
> >> +? }
> >> +? default:
> >> +? ? return false;
> >> +? }
> >> +}
> >> +
> >> +
> >> +/// \brief Fold the instructions after selecting them
> >> +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
> >> +? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SelectionDAG &DAG) const
> > {
> >> +? const R600InstrInfo *TII =
> >> +? ? ? static_cast<const R600InstrInfo
> > *>(DAG.getTarget().getInstrInfo());
> >> +? if (!Node->isMachineOpcode())
> >> +? ? return Node;
> >> +? unsigned Opcode = Node->getMachineOpcode();
> >> +
> >> +? std::vector<SDValue> Ops;
> >> +? for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
> >> +? ? ? ? ? ? ? I != E; ++I)
> >> +??? ? Ops.push_back(*I);
> >> +
> >> +? if (Opcode == AMDGPU::DOT_4) {
> >> +? ? int OperandIdx[] = {
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >> +??? };
> >> +? ? int NegIdx[] = {
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> >> +? ? };
> >> +? ? int AbsIdx[] = {
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> >> +? ? };
> >> +? ? for (unsigned i = 0; i < 8; i++) {
> >> +? ? ? if (OperandIdx[i] < 0)
> >> +? ? ? ? return Node;
> >> +? ? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >> +? ? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >> +? ? ? SDValue &Abs = Ops[AbsIdx[i] - 1];
> >> +? ? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst)
> >> -1;
> >> +? ? ? int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> >> +? ? ? if (HasDst)
> >> +? ? ? ? SelIdx--;
> >> +? ? ? SDValue FakeSel;
> >> +? ? ? SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
> >> +? ? ? if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> >> +? ? ? ? return DAG.getMachineNode(Opcode, SDLoc(Node),
> > Node->getVTList(), Ops);
> >> +? ? }
> >> +? } else {
> >> +? ? if (!TII->hasInstrModifiers(Opcode))
> >> +? ? ? return Node;
> >> +? ? int OperandIdx[] = {
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> >> +? ? };
> >> +? ? int NegIdx[] = {
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> >> +? ? };
> >> +? ? int AbsIdx[] = {
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> >> +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> >> +? ? ? -1
> >> +? ? };
> >> +? ? for (unsigned i = 0; i < 3; i++) {
> >> +? ? ? if (OperandIdx[i] < 0)
> >> +? ? ? ? return Node;
> >> +? ? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >> +? ? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >> +? ? ? SDValue FakeAbs;
> >> +? ? ? SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] :
> > FakeAbs;
> >> +? ? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst)
> >> -1;
> >> +? ? ? int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> >> +? ? ? if (HasDst)
> >> +? ? ? ? SelIdx--;
> >> +? ? ? SDValue FakeSel;
> >> +? ? ? SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
> >> +? ? ? if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> >> +? ? ? ? return DAG.getMachineNode(Opcode, SDLoc(Node),
> > Node->getVTList(), Ops);
> >> +? ? }
> >> +? }
> >> +
> >> +? return Node;
> >> +}
> >> diff --git a/lib/Target/R600/R600ISelLowering.h
> > b/lib/Target/R600/R600ISelLowering.h
> >> index a033fcb..93c3779 100644
> >> --- a/lib/Target/R600/R600ISelLowering.h
> >> +++ b/lib/Target/R600/R600ISelLowering.h
> >> @@ -68,6 +68,7 @@ private:
> >> ? ? void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
> >> ? ? ? ? ? ? ? ? ? ? ? ? unsigned &Channel, unsigned &PtrIncr)
> > const;
> >> ? ? bool isZero(SDValue Op) const;
> >> +? virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG)
> > const;
> >> ? };
> >> ?
> >> ? } // End namespace llvm;
> >> diff --git a/test/CodeGen/R600/complex-folding.ll
> > b/test/CodeGen/R600/complex-folding.ll
> >> new file mode 100644
> >> index 0000000..8dcd450
> >> --- /dev/null
> >> +++ b/test/CodeGen/R600/complex-folding.ll
> >> @@ -0,0 +1,18 @@
> >> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> >> +
> >> +; CHECK: @main
> >> +; CHECK-NOT: MOV
> >> +define void @main() {
> >> +entry:
> >> +? %0 = call float @llvm.R600.load.input(i32 0)
> >> +? %1 = call float @fabs(float %0)
> >> +? %2 = fptoui float %1 to i32
> >> +? %3 = bitcast i32 %2 to float
> >> +? %4 = insertelement <4 x float> undef, float %3, i32 0
> >> +? call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
> >> +? ret void
> >> +}
> >> +
> >> +declare float @llvm.R600.load.input(i32) readnone
> >> +declare float @fabs(float ) readnone
> >> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> >> \ No newline at end of file
> >> --
> >> 1.8.3.1
> >>
> >> _______________________________________________
> >> llvm-commits mailing list
> >> llvm-commits at cs.uiuc.edu
> >> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> >
> From 63e1c1bc74d517143a7a41eaf35953fabb962167 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Tue, 20 Aug 2013 00:47:24 +0200
> Subject: [PATCH 1/3] R600: Move fabs/fneg/sel folding logic into
> PostProcessIsel
>
> This move makes possible to correctly handle multiples instructions
> from a single pattern.
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262 +++------------------------------
> lib/Target/R600/R600ISelLowering.cpp | 179 ++++++++++++++++++++++
> lib/Target/R600/R600ISelLowering.h | 1 +
> test/CodeGen/R600/complex-folding.ll | 18 +++
> 4 files changed, 215 insertions(+), 245 deletions(-)
> create mode 100644 test/CodeGen/R600/complex-folding.ll
>
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 46e50bc..85e1422 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -201,92 +201,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> }
> switch (Opc) {
> default: break;
> - case AMDGPUISD::CONST_ADDRESS: {
> - for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
> - I != SDNode::use_end(); I = Next) {
> - Next = llvm::next(I);
> - if (!I->isMachineOpcode()) {
> - continue;
> - }
> - unsigned Opcode = I->getMachineOpcode();
> - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> - int SrcIdx = I.getOperandNo();
> - int SelIdx;
> - // Unlike MachineInstrs, SDNodes do not have results in their operand
> - // list, so we need to increment the SrcIdx, since
> - // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
> - if (HasDst) {
> - SrcIdx++;
> - }
> -
> - SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
> - if (SelIdx < 0) {
> - continue;
> - }
> -
> - SDValue CstOffset;
> - if (N->getValueType(0).isVector() ||
> - !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
> - continue;
> -
> - // Gather constants values
> - int SrcIndices[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> - };
> - std::vector<unsigned> Consts;
> - for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
> - int OtherSrcIdx = SrcIndices[i];
> - int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> - if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
> - continue;
> - }
> - if (HasDst) {
> - OtherSrcIdx--;
> - OtherSelIdx--;
> - }
> - if (RegisterSDNode *Reg =
> - dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
> - if (Reg->getReg() == AMDGPU::ALU_CONST) {
> - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
> - Consts.push_back(Cst->getZExtValue());
> - }
> - }
> - }
> -
> - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> - Consts.push_back(Cst->getZExtValue());
> - if (!TII->fitsConstReadLimitations(Consts))
> - continue;
> -
> - // Convert back to SDNode indices
> - if (HasDst) {
> - SrcIdx--;
> - SelIdx--;
> - }
> - std::vector<SDValue> Ops;
> - for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
> - if (i == SrcIdx) {
> - Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
> - } else if (i == SelIdx) {
> - Ops.push_back(CstOffset);
> - } else {
> - Ops.push_back(I->getOperand(i));
> - }
> - }
> - CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
> - }
> - break;
> - }
> case ISD::BUILD_VECTOR: {
> unsigned RegClassID;
> const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> @@ -508,38 +422,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> const R600InstrInfo *TII =
> static_cast<const R600InstrInfo*>(TM.getInstrInfo());
> - if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
> - bool IsModified = false;
> - do {
> - std::vector<SDValue> Ops;
> - for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
> - I != E; ++I)
> - Ops.push_back(*I);
> - IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
> - if (IsModified) {
> - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
> - }
> - } while (IsModified);
> -
> - }
> if (Result && Result->isMachineOpcode() &&
> !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
> && TII->hasInstrModifiers(Result->getMachineOpcode())) {
> - // Fold FNEG/FABS
> - // TODO: Isel can generate multiple MachineInst, we need to recursively
> - // parse Result
> - bool IsModified = false;
> - do {
> - std::vector<SDValue> Ops;
> - for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
> - I != E; ++I)
> - Ops.push_back(*I);
> - IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
> - if (IsModified) {
> - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
> - }
> - } while (IsModified);
> -
> // If node has a single use which is CLAMP_R600, folds it
> if (Result->hasOneUse() && Result->isMachineOpcode()) {
> SDNode *PotentialClamp = *Result->use_begin();
> @@ -564,120 +449,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> return Result;
> }
>
> -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
> - SDValue &Abs, const R600InstrInfo *TII) {
> - switch (Src.getOpcode()) {
> - case ISD::FNEG:
> - Src = Src.getOperand(0);
> - Neg = CurDAG->getTargetConstant(1, MVT::i32);
> - return true;
> - case ISD::FABS:
> - if (!Abs.getNode())
> - return false;
> - Src = Src.getOperand(0);
> - Abs = CurDAG->getTargetConstant(1, MVT::i32);
> - return true;
> - case ISD::BITCAST:
> - Src = Src.getOperand(0);
> - return true;
> - default:
> - return false;
> - }
> -}
> -
> -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
> - const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> - int OperandIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> - };
> - int SelIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
> - };
> - int NegIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> - };
> - int AbsIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> - -1
> - };
> -
> -
> - for (unsigned i = 0; i < 3; i++) {
> - if (OperandIdx[i] < 0)
> - return false;
> - SDValue &Src = Ops[OperandIdx[i] - 1];
> - SDValue &Sel = Ops[SelIdx[i] - 1];
> - SDValue &Neg = Ops[NegIdx[i] - 1];
> - SDValue FakeAbs;
> - SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
> - if (FoldOperand(Src, Sel, Neg, Abs, TII))
> - return true;
> - }
> - return false;
> -}
> -
> -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
> - const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> - int OperandIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> - };
> - int SelIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
> - };
> - int NegIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> - };
> - int AbsIdx[] = {
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> - };
> -
> - for (unsigned i = 0; i < 8; i++) {
> - if (OperandIdx[i] < 0)
> - return false;
> - SDValue &Src = Ops[OperandIdx[i] - 1];
> - SDValue &Sel = Ops[SelIdx[i] - 1];
> - SDValue &Neg = Ops[NegIdx[i] - 1];
> - SDValue &Abs = Ops[AbsIdx[i] - 1];
> - if (FoldOperand(Src, Sel, Neg, Abs, TII))
> - return true;
> - }
> - return false;
> -}
>
> bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
> if (!ptr) {
> @@ -890,26 +661,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
> }
>
> void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
> -
> - if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
> - return;
> - }
> -
> - // Go over all selected nodes and try to fold them a bit more
> const AMDGPUTargetLowering& Lowering =
> (*(const AMDGPUTargetLowering*)getTargetLowering());
> - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> - E = CurDAG->allnodes_end(); I != E; ++I) {
> + bool IsModified = false;
> + do {
> + IsModified = false;
> + // Go over all selected nodes and try to fold them a bit more
> + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> + E = CurDAG->allnodes_end(); I != E; ++I) {
>
> - SDNode *Node = I;
> + SDNode *Node = I;
>
> - MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> - if (!MachineNode)
> - continue;
> + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> + if (!MachineNode)
> + continue;
>
> - SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> - if (ResNode != Node) {
> - ReplaceUses(Node, ResNode);
> + SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> + if (ResNode != Node) {
> + ReplaceUses(Node, ResNode);
> + IsModified = true;
> + }
> }
> - }
> + CurDAG->RemoveDeadNodes();
> + } while (IsModified);
> }
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index f0242b8..34a8506 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -1629,3 +1629,182 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
> }
> return SDValue();
> }
> +
> +static bool
> +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
> + SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) {
> + const R600InstrInfo *TII =
> + static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
> + if (!Src.isMachineOpcode())
> + return false;
> + switch (Src.getMachineOpcode()) {
> + case AMDGPU::FNEG_R600:
> + if (!Neg.getNode())
> + return false;
> + Src = Src.getOperand(0);
> + Neg = DAG.getTargetConstant(1, MVT::i32);
> + return true;
> + case AMDGPU::FABS_R600:
> + if (!Abs.getNode())
> + return false;
> + Src = Src.getOperand(0);
> + Abs = DAG.getTargetConstant(1, MVT::i32);
> + return true;
> + case AMDGPU::CONST_COPY: {
> + unsigned Opcode = ParentNode->getMachineOpcode();
> + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> +
> + if (!Sel.getNode())
> + return false;
> +
> + SDValue CstOffset = Src.getOperand(0);
> + if (ParentNode->getValueType(0).isVector())
> + return false;
> +
> + // Gather constants values
> + int SrcIndices[] = {
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> + };
> + std::vector<unsigned> Consts;
> + for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
> + int OtherSrcIdx = SrcIndices[i];
> + int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> + if (OtherSrcIdx < 0 || OtherSelIdx < 0)
> + continue;
> + if (HasDst) {
> + OtherSrcIdx--;
> + OtherSelIdx--;
> + }
> + if (RegisterSDNode *Reg =
> + dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
> + if (Reg->getReg() == AMDGPU::ALU_CONST) {
> + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
> + ParentNode->getOperand(OtherSelIdx));
> + Consts.push_back(Cst->getZExtValue());
> + }
> + }
> + }
> +
> + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> + Consts.push_back(Cst->getZExtValue());
> + if (!TII->fitsConstReadLimitations(Consts)) {
> + return false;
> + }
> +
> + Sel = CstOffset;
> + Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
> + return true;
> + }
> + default:
> + return false;
> + }
> +}
> +
> +
> +/// \brief Fold the instructions after selecting them
> +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
> + SelectionDAG &DAG) const {
> + const R600InstrInfo *TII =
> + static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
> + if (!Node->isMachineOpcode())
> + return Node;
> + unsigned Opcode = Node->getMachineOpcode();
> + SDValue FakeOp;
> +
> + std::vector<SDValue> Ops;
> + for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
> + I != E; ++I)
> + Ops.push_back(*I);
> +
> + if (Opcode == AMDGPU::DOT_4) {
> + int OperandIdx[] = {
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> + };
> + int NegIdx[] = {
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> + };
> + int AbsIdx[] = {
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> + };
> + for (unsigned i = 0; i < 8; i++) {
> + if (OperandIdx[i] < 0)
> + return Node;
> + SDValue &Src = Ops[OperandIdx[i] - 1];
> + SDValue &Neg = Ops[NegIdx[i] - 1];
> + SDValue &Abs = Ops[AbsIdx[i] - 1];
> + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> + if (HasDst)
> + SelIdx--;
> + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> + }
> + } else {
> + if (!TII->hasInstrModifiers(Opcode))
> + return Node;
> + int OperandIdx[] = {
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> + };
> + int NegIdx[] = {
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> + };
> + int AbsIdx[] = {
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> + -1
> + };
> + for (unsigned i = 0; i < 3; i++) {
> + if (OperandIdx[i] < 0)
> + return Node;
> + SDValue &Src = Ops[OperandIdx[i] - 1];
> + SDValue &Neg = Ops[NegIdx[i] - 1];
> + SDValue FakeAbs;
> + SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
> + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> + if (HasDst)
> + SelIdx--;
> + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> + }
> + }
> +
> + return Node;
> +}
> diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
> index a033fcb..93c3779 100644
> --- a/lib/Target/R600/R600ISelLowering.h
> +++ b/lib/Target/R600/R600ISelLowering.h
> @@ -68,6 +68,7 @@ private:
> void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
> unsigned &Channel, unsigned &PtrIncr) const;
> bool isZero(SDValue Op) const;
> + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
> };
>
> } // End namespace llvm;
> diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll
> new file mode 100644
> index 0000000..8dcd450
> --- /dev/null
> +++ b/test/CodeGen/R600/complex-folding.ll
> @@ -0,0 +1,18 @@
> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +
> +; CHECK: @main
> +; CHECK-NOT: MOV
> +define void @main() {
> +entry:
> + %0 = call float @llvm.R600.load.input(i32 0)
> + %1 = call float @fabs(float %0)
> + %2 = fptoui float %1 to i32
> + %3 = bitcast i32 %2 to float
> + %4 = insertelement <4 x float> undef, float %3, i32 0
> + call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
> + ret void
> +}
> +
> +declare float @llvm.R600.load.input(i32) readnone
> +declare float @fabs(float ) readnone
> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> \ No newline at end of file
> --
> 1.8.3.1
>
> From 2ca03e40f47caf30e8613891a2a57d84de01ab36 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sun, 8 Sep 2013 17:17:47 +0200
> Subject: [PATCH 2/3] R600: Move code handling literal folding into
> R600ISelLowering.
>
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 105 ---------------------------------
> lib/Target/R600/R600ISelLowering.cpp | 63 ++++++++++++++++++--
> test/CodeGen/R600/literals.ll | 16 +++++
> 3 files changed, 75 insertions(+), 109 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 85e1422..95037ba 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -193,8 +193,6 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
> }
>
> SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> - const R600InstrInfo *TII =
> - static_cast<const R600InstrInfo*>(TM.getInstrInfo());
> unsigned int Opc = N->getOpcode();
> if (N->isMachineOpcode()) {
> return NULL; // Already selected.
> @@ -310,109 +308,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
> SDLoc(N), N->getValueType(0), Ops);
> }
> -
> - case ISD::ConstantFP:
> - case ISD::Constant: {
> - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> - // XXX: Custom immediate lowering not implemented yet. Instead we use
> - // pseudo instructions defined in SIInstructions.td
> - if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
> - break;
> - }
> -
> - uint64_t ImmValue = 0;
> - unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
> -
> - if (N->getOpcode() == ISD::ConstantFP) {
> - // XXX: 64-bit Immediates not supported yet
> - assert(N->getValueType(0) != MVT::f64);
> -
> - ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
> - APFloat Value = C->getValueAPF();
> - float FloatValue = Value.convertToFloat();
> - if (FloatValue == 0.0) {
> - ImmReg = AMDGPU::ZERO;
> - } else if (FloatValue == 0.5) {
> - ImmReg = AMDGPU::HALF;
> - } else if (FloatValue == 1.0) {
> - ImmReg = AMDGPU::ONE;
> - } else {
> - ImmValue = Value.bitcastToAPInt().getZExtValue();
> - }
> - } else {
> - // XXX: 64-bit Immediates not supported yet
> - assert(N->getValueType(0) != MVT::i64);
> -
> - ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
> - if (C->getZExtValue() == 0) {
> - ImmReg = AMDGPU::ZERO;
> - } else if (C->getZExtValue() == 1) {
> - ImmReg = AMDGPU::ONE_INT;
> - } else {
> - ImmValue = C->getZExtValue();
> - }
> - }
> -
> - for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
> - Use != SDNode::use_end(); Use = Next) {
> - Next = llvm::next(Use);
> - std::vector<SDValue> Ops;
> - for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
> - Ops.push_back(Use->getOperand(i));
> - }
> -
> - if (!Use->isMachineOpcode()) {
> - if (ImmReg == AMDGPU::ALU_LITERAL_X) {
> - // We can only use literal constants (e.g. AMDGPU::ZERO,
> - // AMDGPU::ONE, etc) in machine opcodes.
> - continue;
> - }
> - } else {
> - switch(Use->getMachineOpcode()) {
> - case AMDGPU::REG_SEQUENCE: break;
> - default:
> - if (!TII->isALUInstr(Use->getMachineOpcode()) ||
> - (TII->get(Use->getMachineOpcode()).TSFlags &
> - R600_InstFlag::VECTOR)) {
> - continue;
> - }
> - }
> -
> - // Check that we aren't already using an immediate.
> - // XXX: It's possible for an instruction to have more than one
> - // immediate operand, but this is not supported yet.
> - if (ImmReg == AMDGPU::ALU_LITERAL_X) {
> - int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
> - AMDGPU::OpName::literal);
> - if (ImmIdx == -1) {
> - continue;
> - }
> -
> - if (TII->getOperandIdx(Use->getMachineOpcode(),
> - AMDGPU::OpName::dst) != -1) {
> - // subtract one from ImmIdx, because the DST operand is usually index
> - // 0 for MachineInstrs, but we have no DST in the Ops vector.
> - ImmIdx--;
> - }
> - ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
> - assert(C);
> -
> - if (C->getZExtValue() != 0) {
> - // This instruction is already using an immediate.
> - continue;
> - }
> -
> - // Set the immediate value
> - Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
> - }
> - }
> - // Set the immediate register
> - Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
> -
> - CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
> - }
> - break;
> - }
> }
> SDNode *Result = SelectCode(N);
>
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index 34a8506..ab3911f 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -1632,7 +1632,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
>
> static bool
> FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
> - SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) {
> + SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
> const R600InstrInfo *TII =
> static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
> if (!Src.isMachineOpcode())
> @@ -1705,6 +1705,51 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
> Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
> return true;
> }
> + case AMDGPU::MOV_IMM_I32:
> + case AMDGPU::MOV_IMM_F32: {
> + unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
> + uint64_t ImmValue = 0;
> +
> +
> + if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
> + ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
> + float FloatValue = FPC->getValueAPF().convertToFloat();
> + if (FloatValue == 0.0) {
> + ImmReg = AMDGPU::ZERO;
> + } else if (FloatValue == 0.5) {
> + ImmReg = AMDGPU::HALF;
> + } else if (FloatValue == 1.0) {
> + ImmReg = AMDGPU::ONE;
> + } else {
> + ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
> + }
> + } else {
> + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
> + uint64_t Value = C->getZExtValue();
> + if (Value == 0) {
> + ImmReg = AMDGPU::ZERO;
> + } else if (Value == 1) {
> + ImmReg = AMDGPU::ONE_INT;
> + } else {
> + ImmValue = Value;
> + }
> + }
> +
> + // Check that we aren't already using an immediate.
> + // XXX: It's possible for an instruction to have more than one
> + // immediate operand, but this is not supported yet.
> + if (ImmReg == AMDGPU::ALU_LITERAL_X) {
> + if (!Imm.getNode())
> + return false;
> + ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
> + assert(C);
> + if (C->getZExtValue())
> + return false;
> + }
> + Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
> + Src = DAG.getRegister(ImmReg, MVT::i32);
> + return true;
> + }
> default:
> return false;
> }
> @@ -1768,7 +1813,13 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
> if (HasDst)
> SelIdx--;
> SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> - if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
> + return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> + }
> + } else if (Opcode == AMDGPU::REG_SEQUENCE) {
> + for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
> + SDValue &Src = Ops[i];
> + if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
> return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> }
> } else {
> @@ -1798,10 +1849,14 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
> SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
> bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> - if (HasDst)
> + int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
> + if (HasDst) {
> SelIdx--;
> + ImmIdx--;
> + }
> SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> - if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> + SDValue &Imm = Ops[ImmIdx];
> + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
> return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> }
> }
> diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
> index e5bdbc4..47191e0 100644
> --- a/test/CodeGen/R600/literals.ll
> +++ b/test/CodeGen/R600/literals.ll
> @@ -46,3 +46,19 @@ entry:
> store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
> ret void
> }
> +
> +; CHECK: @inline_literal_dot4
> +; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
> +; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
> +; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
> +; CHECK-NEXT: DOT4 * T[[GPR]].W (MASKED), 1.0
> +define void @inline_literal_dot4(float addrspace(1)* %out) {
> +entry:
> + %0 = call float @llvm.AMDGPU.dp4(<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
> + store float %0, float addrspace(1)* %out
> + ret void
> +}
> +
> +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
> +
> +attributes #1 = { readnone }
> --
> 1.8.3.1
>
> From d81d8629e028564145b753c6fb1e370b641d5096 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sun, 8 Sep 2013 19:18:12 +0200
> Subject: [PATCH 3/3] R600: Move clamp handling code to R600IselLowering.cpp
>
> ---
> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 34 +---------------------------------
> lib/Target/R600/R600ISelLowering.cpp | 16 ++++++++++++++++
> 2 files changed, 17 insertions(+), 33 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 95037ba..a008c96 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -309,39 +309,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> SDLoc(N), N->getValueType(0), Ops);
> }
> }
> - SDNode *Result = SelectCode(N);
> -
> - // Fold operands of selected node
> -
> - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> - if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> - const R600InstrInfo *TII =
> - static_cast<const R600InstrInfo*>(TM.getInstrInfo());
> - if (Result && Result->isMachineOpcode() &&
> - !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
> - && TII->hasInstrModifiers(Result->getMachineOpcode())) {
> - // If node has a single use which is CLAMP_R600, folds it
> - if (Result->hasOneUse() && Result->isMachineOpcode()) {
> - SDNode *PotentialClamp = *Result->use_begin();
> - if (PotentialClamp->isMachineOpcode() &&
> - PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
> - unsigned ClampIdx =
> - TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
> - std::vector<SDValue> Ops;
> - unsigned NumOp = Result->getNumOperands();
> - for (unsigned i = 0; i < NumOp; ++i) {
> - Ops.push_back(Result->getOperand(i));
> - }
> - Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
> - Result = CurDAG->SelectNodeTo(PotentialClamp,
> - Result->getMachineOpcode(), PotentialClamp->getVTList(),
> - Ops.data(), NumOp);
> - }
> - }
> - }
> - }
> -
> - return Result;
> + return SelectCode(N);
> }
>
>
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index ab3911f..95ed144 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -1822,6 +1822,22 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
> if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
> return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> }
> + } else if (Opcode == AMDGPU::CLAMP_R600) {
> + SDValue Src = Node->getOperand(0);
> + if (!Src.isMachineOpcode() ||
> + !TII->hasInstrModifiers(Src.getMachineOpcode()))
> + return Node;
> + int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
> + AMDGPU::OpName::clamp);
> + if (ClampIdx < 0)
> + return Node;
> + std::vector<SDValue> Ops;
> + unsigned NumOp = Src.getNumOperands();
> + for(unsigned i = 0; i < NumOp; ++i)
> + Ops.push_back(Src.getOperand(i));
> + Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
> + return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
> + Node->getVTList(), Ops);
> } else {
> if (!TII->hasInstrModifiers(Opcode))
> return Node;
> --
> 1.8.3.1
>
More information about the llvm-commits
mailing list