[PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel
Vincent Lejeune
vljn at ovi.com
Sun Sep 8 10:42:27 PDT 2013
I reworked a little this patch and also refactored MOV_IMM_* and CLAMP_R600
handling code into the R600 postIsel hook in 2 others patches.
Vincent
----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : Vincent Lejeune <vljn at ovi.com>
> Cc : llvm-commits at cs.uiuc.edu
> Envoyé le : Jeudi 5 septembre 2013 17h49
> Objet : Re: [PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel
>
> On Tue, Sep 03, 2013 at 01:11:35AM +0200, Vincent Lejeune wrote:
>> This move makes possible to correctly handle multiples instructions
>> from a single pattern.
>
> Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
>
>> ---
>> lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262
> +++------------------------------
>> lib/Target/R600/R600ISelLowering.cpp | 178 ++++++++++++++++++++++
>> lib/Target/R600/R600ISelLowering.h | 1 +
>> test/CodeGen/R600/complex-folding.ll | 18 +++
>> 4 files changed, 214 insertions(+), 245 deletions(-)
>> create mode 100644 test/CodeGen/R600/complex-folding.ll
>>
>> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>> index f222901..e099282 100644
>> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>> @@ -169,92 +169,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>> }
>> switch (Opc) {
>> default: break;
>> - case AMDGPUISD::CONST_ADDRESS: {
>> - for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
>> - I != SDNode::use_end(); I = Next) {
>> - Next = llvm::next(I);
>> - if (!I->isMachineOpcode()) {
>> - continue;
>> - }
>> - unsigned Opcode = I->getMachineOpcode();
>> - bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst)
>> -1;
>> - int SrcIdx = I.getOperandNo();
>> - int SelIdx;
>> - // Unlike MachineInstrs, SDNodes do not have results in their
> operand
>> - // list, so we need to increment the SrcIdx, since
>> - // R600InstrInfo::getOperandIdx is based on the MachineInstr
> indices.
>> - if (HasDst) {
>> - SrcIdx++;
>> - }
>> -
>> - SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
>> - if (SelIdx < 0) {
>> - continue;
>> - }
>> -
>> - SDValue CstOffset;
>> - if (N->getValueType(0).isVector() ||
>> - !SelectGlobalValueConstantOffset(N->getOperand(0),
> CstOffset))
>> - continue;
>> -
>> - // Gather constants values
>> - int SrcIndices[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>> - };
>> - std::vector<unsigned> Consts;
>> - for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++)
> {
>> - int OtherSrcIdx = SrcIndices[i];
>> - int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
>> - if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
>> - continue;
>> - }
>> - if (HasDst) {
>> - OtherSrcIdx--;
>> - OtherSelIdx--;
>> - }
>> - if (RegisterSDNode *Reg =
>> -
> dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
>> - if (Reg->getReg() == AMDGPU::ALU_CONST) {
>> - ConstantSDNode *Cst =
> dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
>> - Consts.push_back(Cst->getZExtValue());
>> - }
>> - }
>> - }
>> -
>> - ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
>> - Consts.push_back(Cst->getZExtValue());
>> - if (!TII->fitsConstReadLimitations(Consts))
>> - continue;
>> -
>> - // Convert back to SDNode indices
>> - if (HasDst) {
>> - SrcIdx--;
>> - SelIdx--;
>> - }
>> - std::vector<SDValue> Ops;
>> - for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
>> - if (i == SrcIdx) {
>> - Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST,
> MVT::f32));
>> - } else if (i == SelIdx) {
>> - Ops.push_back(CstOffset);
>> - } else {
>> - Ops.push_back(I->getOperand(i));
>> - }
>> - }
>> - CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
>> - }
>> - break;
>> - }
>> case ISD::BUILD_VECTOR: {
>> const AMDGPUSubtarget &ST =
> TM.getSubtarget<AMDGPUSubtarget>();
>> if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
>> @@ -422,38 +336,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>> if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
>> const R600InstrInfo *TII =
>> static_cast<const R600InstrInfo*>(TM.getInstrInfo());
>> - if (Result && Result->isMachineOpcode() &&
> Result->getMachineOpcode() == AMDGPU::DOT_4) {
>> - bool IsModified = false;
>> - do {
>> - std::vector<SDValue> Ops;
>> - for(SDNode::op_iterator I = Result->op_begin(), E =
> Result->op_end();
>> - I != E; ++I)
>> - Ops.push_back(*I);
>> - IsModified = FoldDotOperands(Result->getMachineOpcode(), TII,
> Ops);
>> - if (IsModified) {
>> - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(),
> Ops.size());
>> - }
>> - } while (IsModified);
>> -
>> - }
>> if (Result && Result->isMachineOpcode() &&
>> !(TII->get(Result->getMachineOpcode()).TSFlags &
> R600_InstFlag::VECTOR)
>> &&
> TII->hasInstrModifiers(Result->getMachineOpcode())) {
>> - // Fold FNEG/FABS
>> - // TODO: Isel can generate multiple MachineInst, we need to
> recursively
>> - // parse Result
>> - bool IsModified = false;
>> - do {
>> - std::vector<SDValue> Ops;
>> - for(SDNode::op_iterator I = Result->op_begin(), E =
> Result->op_end();
>> - I != E; ++I)
>> - Ops.push_back(*I);
>> - IsModified = FoldOperands(Result->getMachineOpcode(), TII,
> Ops);
>> - if (IsModified) {
>> - Result = CurDAG->UpdateNodeOperands(Result, Ops.data(),
> Ops.size());
>> - }
>> - } while (IsModified);
>> -
>> // If node has a single use which is CLAMP_R600, folds it
>> if (Result->hasOneUse() && Result->isMachineOpcode())
> {
>> SDNode *PotentialClamp = *Result->use_begin();
>> @@ -478,120 +363,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>> return Result;
>> }
>>
>> -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel,
> SDValue &Neg,
>> - SDValue &Abs, const R600InstrInfo
> *TII) {
>> - switch (Src.getOpcode()) {
>> - case ISD::FNEG:
>> - Src = Src.getOperand(0);
>> - Neg = CurDAG->getTargetConstant(1, MVT::i32);
>> - return true;
>> - case ISD::FABS:
>> - if (!Abs.getNode())
>> - return false;
>> - Src = Src.getOperand(0);
>> - Abs = CurDAG->getTargetConstant(1, MVT::i32);
>> - return true;
>> - case ISD::BITCAST:
>> - Src = Src.getOperand(0);
>> - return true;
>> - default:
>> - return false;
>> - }
>> -}
>> -
>> -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
>> - const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
>> - int OperandIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
>> - };
>> - int SelIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
>> - };
>> - int NegIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
>> - };
>> - int AbsIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
>> - -1
>> - };
>> -
>> -
>> - for (unsigned i = 0; i < 3; i++) {
>> - if (OperandIdx[i] < 0)
>> - return false;
>> - SDValue &Src = Ops[OperandIdx[i] - 1];
>> - SDValue &Sel = Ops[SelIdx[i] - 1];
>> - SDValue &Neg = Ops[NegIdx[i] - 1];
>> - SDValue FakeAbs;
>> - SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
>> - if (FoldOperand(Src, Sel, Neg, Abs, TII))
>> - return true;
>> - }
>> - return false;
>> -}
>> -
>> -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
>> - const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
>> - int OperandIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>> - };
>> - int SelIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
>> - };
>> - int NegIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
>> - };
>> - int AbsIdx[] = {
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
>> - TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
>> - };
>> -
>> - for (unsigned i = 0; i < 8; i++) {
>> - if (OperandIdx[i] < 0)
>> - return false;
>> - SDValue &Src = Ops[OperandIdx[i] - 1];
>> - SDValue &Sel = Ops[SelIdx[i] - 1];
>> - SDValue &Neg = Ops[NegIdx[i] - 1];
>> - SDValue &Abs = Ops[AbsIdx[i] - 1];
>> - if (FoldOperand(Src, Sel, Neg, Abs, TII))
>> - return true;
>> - }
>> - return false;
>> -}
>>
>> bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int
> addrspace) {
>> if (!ptr) {
>> @@ -804,26 +575,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op,
> SDValue &U24) {
>> }
>>
>> void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
>> -
>> - if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
> {
>> - return;
>> - }
>> -
>> - // Go over all selected nodes and try to fold them a bit more
>> const AMDGPUTargetLowering& Lowering =
>> (*(const AMDGPUTargetLowering*)getTargetLowering());
>> - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
>> - E = CurDAG->allnodes_end(); I != E; ++I) {
>> + bool IsModified = false;
>> + do {
>> + IsModified = false;
>> + // Go over all selected nodes and try to fold them a bit more
>> + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
>> + E = CurDAG->allnodes_end(); I != E; ++I) {
>>
>> - SDNode *Node = I;
>> + SDNode *Node = I;
>>
>> - MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
>> - if (!MachineNode)
>> - continue;
>> + MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
>> + if (!MachineNode)
>> + continue;
>>
>> - SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
>> - if (ResNode != Node) {
>> - ReplaceUses(Node, ResNode);
>> + SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
>> + if (ResNode != Node) {
>> + ReplaceUses(Node, ResNode);
>> + IsModified = true;
>> + }
>> }
>> - }
>> + CurDAG->RemoveDeadNodes();
>> + } while (IsModified);
>> }
>> diff --git a/lib/Target/R600/R600ISelLowering.cpp
> b/lib/Target/R600/R600ISelLowering.cpp
>> index a89875c..4f0c420 100644
>> --- a/lib/Target/R600/R600ISelLowering.cpp
>> +++ b/lib/Target/R600/R600ISelLowering.cpp
>> @@ -1586,3 +1586,181 @@ SDValue
> R600TargetLowering::PerformDAGCombine(SDNode *N,
>> }
>> return SDValue();
>> }
>> +
>> +static bool
>> +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue
> &Neg,
>> + SDValue &Abs, SDValue &Sel, SelectionDAG &DAG)
> {
>> + const R600InstrInfo *TII =
>> + static_cast<const R600InstrInfo
> *>(DAG.getTarget().getInstrInfo());
>> + if (!Src.isMachineOpcode())
>> + return false;
>> + switch (Src.getMachineOpcode()) {
>> + case AMDGPU::FNEG_R600:
>> + Src = Src.getOperand(0);
>> + Neg = DAG.getTargetConstant(1, MVT::i32);
>> + return true;
>> + case AMDGPU::FABS_R600:
>> + if (!Abs.getNode())
>> + return false;
>> + Src = Src.getOperand(0);
>> + Abs = DAG.getTargetConstant(1, MVT::i32);
>> + return true;
>> + case AMDGPU::CONST_COPY: {
>> + unsigned Opcode = ParentNode->getMachineOpcode();
>> + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) >
> -1;
>> +
>> + if (!Sel.getNode())
>> + return false;
>> +
>> + SDValue CstOffset = Src.getOperand(0);
>> + if (ParentNode->getValueType(0).isVector())
>> + return false;
>> +
>> + // Gather constants values
>> + int SrcIndices[] = {
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>> + };
>> + std::vector<unsigned> Consts;
>> + for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++)
> {
>> + int OtherSrcIdx = SrcIndices[i];
>> + int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
>> + if (OtherSrcIdx < 0 || OtherSelIdx < 0)
>> + continue;
>> + if (HasDst) {
>> + OtherSrcIdx--;
>> + OtherSelIdx--;
>> + }
>> + if (RegisterSDNode *Reg =
>> +
> dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
>> + if (Reg->getReg() == AMDGPU::ALU_CONST) {
>> + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
>> + ParentNode->getOperand(OtherSelIdx));
>> + Consts.push_back(Cst->getZExtValue());
>> + }
>> + }
>> + }
>> +
>> + ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
>> + Consts.push_back(Cst->getZExtValue());
>> + if (!TII->fitsConstReadLimitations(Consts)) {
>> + return false;
>> + }
>> +
>> + Sel = CstOffset;
>> + Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
>> + return true;
>> + }
>> + default:
>> + return false;
>> + }
>> +}
>> +
>> +
>> +/// \brief Fold the instructions after selecting them
>> +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
>> + SelectionDAG &DAG) const
> {
>> + const R600InstrInfo *TII =
>> + static_cast<const R600InstrInfo
> *>(DAG.getTarget().getInstrInfo());
>> + if (!Node->isMachineOpcode())
>> + return Node;
>> + unsigned Opcode = Node->getMachineOpcode();
>> +
>> + std::vector<SDValue> Ops;
>> + for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
>> + I != E; ++I)
>> + Ops.push_back(*I);
>> +
>> + if (Opcode == AMDGPU::DOT_4) {
>> + int OperandIdx[] = {
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>> + };
>> + int NegIdx[] = {
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
>> + };
>> + int AbsIdx[] = {
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
>> + };
>> + for (unsigned i = 0; i < 8; i++) {
>> + if (OperandIdx[i] < 0)
>> + return Node;
>> + SDValue &Src = Ops[OperandIdx[i] - 1];
>> + SDValue &Neg = Ops[NegIdx[i] - 1];
>> + SDValue &Abs = Ops[AbsIdx[i] - 1];
>> + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst)
>> -1;
>> + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
>> + if (HasDst)
>> + SelIdx--;
>> + SDValue FakeSel;
>> + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
>> + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
>> + return DAG.getMachineNode(Opcode, SDLoc(Node),
> Node->getVTList(), Ops);
>> + }
>> + } else {
>> + if (!TII->hasInstrModifiers(Opcode))
>> + return Node;
>> + int OperandIdx[] = {
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
>> + };
>> + int NegIdx[] = {
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
>> + };
>> + int AbsIdx[] = {
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
>> + TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
>> + -1
>> + };
>> + for (unsigned i = 0; i < 3; i++) {
>> + if (OperandIdx[i] < 0)
>> + return Node;
>> + SDValue &Src = Ops[OperandIdx[i] - 1];
>> + SDValue &Neg = Ops[NegIdx[i] - 1];
>> + SDValue FakeAbs;
>> + SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] :
> FakeAbs;
>> + bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst)
>> -1;
>> + int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
>> + if (HasDst)
>> + SelIdx--;
>> + SDValue FakeSel;
>> + SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
>> + if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
>> + return DAG.getMachineNode(Opcode, SDLoc(Node),
> Node->getVTList(), Ops);
>> + }
>> + }
>> +
>> + return Node;
>> +}
>> diff --git a/lib/Target/R600/R600ISelLowering.h
> b/lib/Target/R600/R600ISelLowering.h
>> index a033fcb..93c3779 100644
>> --- a/lib/Target/R600/R600ISelLowering.h
>> +++ b/lib/Target/R600/R600ISelLowering.h
>> @@ -68,6 +68,7 @@ private:
>> void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
>> unsigned &Channel, unsigned &PtrIncr)
> const;
>> bool isZero(SDValue Op) const;
>> + virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG)
> const;
>> };
>>
>> } // End namespace llvm;
>> diff --git a/test/CodeGen/R600/complex-folding.ll
> b/test/CodeGen/R600/complex-folding.ll
>> new file mode 100644
>> index 0000000..8dcd450
>> --- /dev/null
>> +++ b/test/CodeGen/R600/complex-folding.ll
>> @@ -0,0 +1,18 @@
>> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>> +
>> +; CHECK: @main
>> +; CHECK-NOT: MOV
>> +define void @main() {
>> +entry:
>> + %0 = call float @llvm.R600.load.input(i32 0)
>> + %1 = call float @fabs(float %0)
>> + %2 = fptoui float %1 to i32
>> + %3 = bitcast i32 %2 to float
>> + %4 = insertelement <4 x float> undef, float %3, i32 0
>> + call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
>> + ret void
>> +}
>> +
>> +declare float @llvm.R600.load.input(i32) readnone
>> +declare float @fabs(float ) readnone
>> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
>> \ No newline at end of file
>> --
>> 1.8.3.1
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-R600-Move-fabs-fneg-sel-folding-logic-into-PostProce.patch
Type: text/x-patch
Size: 20468 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130908/b95e7055/attachment.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-R600-Move-code-handling-literal-folding-into-R600ISe.patch
Type: text/x-patch
Size: 9718 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130908/b95e7055/attachment-0001.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0003-R600-Move-clamp-handling-code-to-R600IselLowering.cp.patch
Type: text/x-patch
Size: 3429 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130908/b95e7055/attachment-0002.bin>
More information about the llvm-commits
mailing list