[PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel

Vincent Lejeune vljn at ovi.com
Sun Sep 8 10:42:27 PDT 2013


I reworked a little this patch and also refactored MOV_IMM_* and CLAMP_R600
handling code into the R600 postIsel hook in 2 others patches.

Vincent



----- Mail original -----
> De : Tom Stellard <tom at stellard.net>
> À : Vincent Lejeune <vljn at ovi.com>
> Cc : llvm-commits at cs.uiuc.edu
> Envoyé le : Jeudi 5 septembre 2013 17h49
> Objet : Re: [PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel
> 
> On Tue, Sep 03, 2013 at 01:11:35AM +0200, Vincent Lejeune wrote:
>>  This move makes possible to correctly handle multiples instructions
>>  from a single pattern.
> 
> Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> 
>>  ---
>>   lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262 
> +++------------------------------
>>   lib/Target/R600/R600ISelLowering.cpp   | 178 ++++++++++++++++++++++
>>   lib/Target/R600/R600ISelLowering.h     |   1 +
>>   test/CodeGen/R600/complex-folding.ll   |  18 +++
>>   4 files changed, 214 insertions(+), 245 deletions(-)
>>   create mode 100644 test/CodeGen/R600/complex-folding.ll
>> 
>>  diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp 
> b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>>  index f222901..e099282 100644
>>  --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>>  +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
>>  @@ -169,92 +169,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>>     }
>>     switch (Opc) {
>>     default: break;
>>  -  case AMDGPUISD::CONST_ADDRESS: {
>>  -    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
>>  -                              I != SDNode::use_end(); I = Next) {
>>  -      Next = llvm::next(I);
>>  -      if (!I->isMachineOpcode()) {
>>  -        continue;
>>  -      }
>>  -      unsigned Opcode = I->getMachineOpcode();
>>  -      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) 
>>  -1;
>>  -      int SrcIdx = I.getOperandNo();
>>  -      int SelIdx;
>>  -      // Unlike MachineInstrs, SDNodes do not have results in their 
> operand
>>  -      // list, so we need to increment the SrcIdx, since
>>  -      // R600InstrInfo::getOperandIdx is based on the MachineInstr 
> indices.
>>  -      if (HasDst) {
>>  -        SrcIdx++;
>>  -      }
>>  -
>>  -      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
>>  -      if (SelIdx < 0) {
>>  -        continue;
>>  -      }
>>  -
>>  -      SDValue CstOffset;
>>  -      if (N->getValueType(0).isVector() ||
>>  -          !SelectGlobalValueConstantOffset(N->getOperand(0), 
> CstOffset))
>>  -        continue;
>>  -
>>  -      // Gather constants values
>>  -      int SrcIndices[] = {
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>>  -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>>  -      };
>>  -      std::vector<unsigned> Consts;
>>  -      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) 
> {
>>  -        int OtherSrcIdx = SrcIndices[i];
>>  -        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
>>  -        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
>>  -          continue;
>>  -        }
>>  -        if (HasDst) {
>>  -          OtherSrcIdx--;
>>  -          OtherSelIdx--;
>>  -        }
>>  -        if (RegisterSDNode *Reg =
>>  -                        
> dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
>>  -          if (Reg->getReg() == AMDGPU::ALU_CONST) {
>>  -            ConstantSDNode *Cst = 
> dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
>>  -            Consts.push_back(Cst->getZExtValue());
>>  -          }
>>  -        }
>>  -      }
>>  -
>>  -      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
>>  -      Consts.push_back(Cst->getZExtValue());
>>  -      if (!TII->fitsConstReadLimitations(Consts))
>>  -        continue;
>>  -
>>  -      // Convert back to SDNode indices
>>  -      if (HasDst) {
>>  -        SrcIdx--;
>>  -        SelIdx--;
>>  -      }
>>  -      std::vector<SDValue> Ops;
>>  -      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
>>  -        if (i == SrcIdx) {
>>  -          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, 
> MVT::f32));
>>  -        } else if (i == SelIdx) {
>>  -          Ops.push_back(CstOffset);
>>  -        } else {
>>  -          Ops.push_back(I->getOperand(i));
>>  -        }
>>  -      }
>>  -      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
>>  -    }
>>  -    break;
>>  -  }
>>     case ISD::BUILD_VECTOR: {
>>       const AMDGPUSubtarget &ST = 
> TM.getSubtarget<AMDGPUSubtarget>();
>>       if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
>>  @@ -422,38 +336,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>>     if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
>>       const R600InstrInfo *TII =
>>           static_cast<const R600InstrInfo*>(TM.getInstrInfo());
>>  -    if (Result && Result->isMachineOpcode() && 
> Result->getMachineOpcode() == AMDGPU::DOT_4) {
>>  -      bool IsModified = false;
>>  -      do {
>>  -        std::vector<SDValue> Ops;
>>  -        for(SDNode::op_iterator I = Result->op_begin(), E = 
> Result->op_end();
>>  -            I != E; ++I)
>>  -          Ops.push_back(*I);
>>  -        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, 
> Ops);
>>  -        if (IsModified) {
>>  -          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), 
> Ops.size());
>>  -        }
>>  -      } while (IsModified);
>>  -
>>  -    }
>>       if (Result && Result->isMachineOpcode() &&
>>           !(TII->get(Result->getMachineOpcode()).TSFlags & 
> R600_InstFlag::VECTOR)
>>           && 
> TII->hasInstrModifiers(Result->getMachineOpcode())) {
>>  -      // Fold FNEG/FABS
>>  -      // TODO: Isel can generate multiple MachineInst, we need to 
> recursively
>>  -      // parse Result
>>  -      bool IsModified = false;
>>  -      do {
>>  -        std::vector<SDValue> Ops;
>>  -        for(SDNode::op_iterator I = Result->op_begin(), E = 
> Result->op_end();
>>  -            I != E; ++I)
>>  -          Ops.push_back(*I);
>>  -        IsModified = FoldOperands(Result->getMachineOpcode(), TII, 
> Ops);
>>  -        if (IsModified) {
>>  -          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), 
> Ops.size());
>>  -        }
>>  -      } while (IsModified);
>>  -
>>         // If node has a single use which is CLAMP_R600, folds it
>>         if (Result->hasOneUse() && Result->isMachineOpcode()) 
> {
>>           SDNode *PotentialClamp = *Result->use_begin();
>>  @@ -478,120 +363,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>>     return Result;
>>   }
>>   
>>  -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, 
> SDValue &Neg,
>>  -                                     SDValue &Abs, const R600InstrInfo 
> *TII) {
>>  -  switch (Src.getOpcode()) {
>>  -  case ISD::FNEG:
>>  -    Src = Src.getOperand(0);
>>  -    Neg = CurDAG->getTargetConstant(1, MVT::i32);
>>  -    return true;
>>  -  case ISD::FABS:
>>  -    if (!Abs.getNode())
>>  -      return false;
>>  -    Src = Src.getOperand(0);
>>  -    Abs = CurDAG->getTargetConstant(1, MVT::i32);
>>  -    return true;
>>  -  case ISD::BITCAST:
>>  -    Src = Src.getOperand(0);
>>  -    return true;
>>  -  default:
>>  -    return false;
>>  -  }
>>  -}
>>  -
>>  -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
>>  -    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
>>  -  int OperandIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
>>  -  };
>>  -  int SelIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
>>  -  };
>>  -  int NegIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
>>  -  };
>>  -  int AbsIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
>>  -    -1
>>  -  };
>>  -
>>  -
>>  -  for (unsigned i = 0; i < 3; i++) {
>>  -    if (OperandIdx[i] < 0)
>>  -      return false;
>>  -    SDValue &Src = Ops[OperandIdx[i] - 1];
>>  -    SDValue &Sel = Ops[SelIdx[i] - 1];
>>  -    SDValue &Neg = Ops[NegIdx[i] - 1];
>>  -    SDValue FakeAbs;
>>  -    SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
>>  -    if (FoldOperand(Src, Sel, Neg, Abs, TII))
>>  -      return true;
>>  -  }
>>  -  return false;
>>  -}
>>  -
>>  -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
>>  -    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
>>  -  int OperandIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>>  -  };
>>  -  int SelIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
>>  -  };
>>  -  int NegIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
>>  -  };
>>  -  int AbsIdx[] = {
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
>>  -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
>>  -  };
>>  -
>>  -  for (unsigned i = 0; i < 8; i++) {
>>  -    if (OperandIdx[i] < 0)
>>  -      return false;
>>  -    SDValue &Src = Ops[OperandIdx[i] - 1];
>>  -    SDValue &Sel = Ops[SelIdx[i] - 1];
>>  -    SDValue &Neg = Ops[NegIdx[i] - 1];
>>  -    SDValue &Abs = Ops[AbsIdx[i] - 1];
>>  -    if (FoldOperand(Src, Sel, Neg, Abs, TII))
>>  -      return true;
>>  -  }
>>  -  return false;
>>  -}
>>   
>>   bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int 
> addrspace) {
>>     if (!ptr) {
>>  @@ -804,26 +575,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, 
> SDValue &U24) {
>>   }
>>   
>>   void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
>>  -
>>  -  if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 
> {
>>  -    return;
>>  -  }
>>  -
>>  -  // Go over all selected nodes and try to fold them a bit more
>>     const AMDGPUTargetLowering& Lowering =
>>       (*(const AMDGPUTargetLowering*)getTargetLowering());
>>  -  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
>>  -       E = CurDAG->allnodes_end(); I != E; ++I) {
>>  +  bool IsModified = false;
>>  +  do {
>>  +    IsModified = false;
>>  +    // Go over all selected nodes and try to fold them a bit more
>>  +    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
>>  +         E = CurDAG->allnodes_end(); I != E; ++I) {
>>   
>>  -    SDNode *Node = I;
>>  +      SDNode *Node = I;
>>   
>>  -    MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
>>  -    if (!MachineNode)
>>  -      continue;
>>  +      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
>>  +      if (!MachineNode)
>>  +        continue;
>>   
>>  -    SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
>>  -    if (ResNode != Node) {
>>  -      ReplaceUses(Node, ResNode);
>>  +      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
>>  +      if (ResNode != Node) {
>>  +        ReplaceUses(Node, ResNode);
>>  +        IsModified = true;
>>  +      }
>>       }
>>  -  }
>>  +    CurDAG->RemoveDeadNodes();
>>  +  } while (IsModified);
>>   }
>>  diff --git a/lib/Target/R600/R600ISelLowering.cpp 
> b/lib/Target/R600/R600ISelLowering.cpp
>>  index a89875c..4f0c420 100644
>>  --- a/lib/Target/R600/R600ISelLowering.cpp
>>  +++ b/lib/Target/R600/R600ISelLowering.cpp
>>  @@ -1586,3 +1586,181 @@ SDValue 
> R600TargetLowering::PerformDAGCombine(SDNode *N,
>>     }
>>     return SDValue();
>>   }
>>  +
>>  +static bool
>>  +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue 
> &Neg,
>>  +            SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) 
> {
>>  +  const R600InstrInfo *TII =
>>  +      static_cast<const R600InstrInfo 
> *>(DAG.getTarget().getInstrInfo());
>>  +  if (!Src.isMachineOpcode())
>>  +    return false;
>>  +  switch (Src.getMachineOpcode()) {
>>  +  case AMDGPU::FNEG_R600:
>>  +    Src = Src.getOperand(0);
>>  +    Neg = DAG.getTargetConstant(1, MVT::i32);
>>  +    return true;
>>  +  case AMDGPU::FABS_R600:
>>  +    if (!Abs.getNode())
>>  +      return false;
>>  +    Src = Src.getOperand(0);
>>  +    Abs = DAG.getTargetConstant(1, MVT::i32);
>>  +    return true;
>>  +  case AMDGPU::CONST_COPY: {
>>  +    unsigned Opcode = ParentNode->getMachineOpcode();
>>  +    bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > 
> -1;
>>  +
>>  +    if (!Sel.getNode())
>>  +      return false;
>>  +
>>  +    SDValue CstOffset = Src.getOperand(0);
>>  +    if (ParentNode->getValueType(0).isVector())
>>  +      return false;
>>  +
>>  +    // Gather constants values
>>  +    int SrcIndices[] = {
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>>  +    };
>>  +    std::vector<unsigned> Consts;
>>  +    for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) 
> {
>>  +      int OtherSrcIdx = SrcIndices[i];
>>  +      int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
>>  +      if (OtherSrcIdx < 0 || OtherSelIdx < 0)
>>  +        continue;
>>  +      if (HasDst) {
>>  +        OtherSrcIdx--;
>>  +        OtherSelIdx--;
>>  +      }
>>  +      if (RegisterSDNode *Reg =
>>  +          
> dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
>>  +        if (Reg->getReg() == AMDGPU::ALU_CONST) {
>>  +          ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
>>  +              ParentNode->getOperand(OtherSelIdx));
>>  +          Consts.push_back(Cst->getZExtValue());
>>  +        }
>>  +      }
>>  +    }
>>  +
>>  +    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
>>  +    Consts.push_back(Cst->getZExtValue());
>>  +    if (!TII->fitsConstReadLimitations(Consts)) {
>>  +      return false;
>>  +    }
>>  +
>>  +    Sel = CstOffset;
>>  +    Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
>>  +    return true;
>>  +  }
>>  +  default:
>>  +    return false;
>>  +  }
>>  +}
>>  +
>>  +
>>  +/// \brief Fold the instructions after selecting them
>>  +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
>>  +                                            SelectionDAG &DAG) const 
> {
>>  +  const R600InstrInfo *TII =
>>  +      static_cast<const R600InstrInfo 
> *>(DAG.getTarget().getInstrInfo());
>>  +  if (!Node->isMachineOpcode())
>>  +    return Node;
>>  +  unsigned Opcode = Node->getMachineOpcode();
>>  +
>>  +  std::vector<SDValue> Ops;
>>  +  for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
>>  +              I != E; ++I)
>>  +      Ops.push_back(*I);
>>  +
>>  +  if (Opcode == AMDGPU::DOT_4) {
>>  +    int OperandIdx[] = {
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
>>  +    };
>>  +    int NegIdx[] = {
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
>>  +    };
>>  +    int AbsIdx[] = {
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
>>  +    };
>>  +    for (unsigned i = 0; i < 8; i++) {
>>  +      if (OperandIdx[i] < 0)
>>  +        return Node;
>>  +      SDValue &Src = Ops[OperandIdx[i] - 1];
>>  +      SDValue &Neg = Ops[NegIdx[i] - 1];
>>  +      SDValue &Abs = Ops[AbsIdx[i] - 1];
>>  +      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) 
>>  -1;
>>  +      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
>>  +      if (HasDst)
>>  +        SelIdx--;
>>  +      SDValue FakeSel;
>>  +      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
>>  +      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
>>  +        return DAG.getMachineNode(Opcode, SDLoc(Node), 
> Node->getVTList(), Ops);
>>  +    }
>>  +  } else {
>>  +    if (!TII->hasInstrModifiers(Opcode))
>>  +      return Node;
>>  +    int OperandIdx[] = {
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
>>  +    };
>>  +    int NegIdx[] = {
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
>>  +    };
>>  +    int AbsIdx[] = {
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
>>  +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
>>  +      -1
>>  +    };
>>  +    for (unsigned i = 0; i < 3; i++) {
>>  +      if (OperandIdx[i] < 0)
>>  +        return Node;
>>  +      SDValue &Src = Ops[OperandIdx[i] - 1];
>>  +      SDValue &Neg = Ops[NegIdx[i] - 1];
>>  +      SDValue FakeAbs;
>>  +      SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : 
> FakeAbs;
>>  +      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) 
>>  -1;
>>  +      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
>>  +      if (HasDst)
>>  +        SelIdx--;
>>  +      SDValue FakeSel;
>>  +      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
>>  +      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
>>  +        return DAG.getMachineNode(Opcode, SDLoc(Node), 
> Node->getVTList(), Ops);
>>  +    }
>>  +  }
>>  +
>>  +  return Node;
>>  +}
>>  diff --git a/lib/Target/R600/R600ISelLowering.h 
> b/lib/Target/R600/R600ISelLowering.h
>>  index a033fcb..93c3779 100644
>>  --- a/lib/Target/R600/R600ISelLowering.h
>>  +++ b/lib/Target/R600/R600ISelLowering.h
>>  @@ -68,6 +68,7 @@ private:
>>     void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
>>                          unsigned &Channel, unsigned &PtrIncr) 
> const;
>>     bool isZero(SDValue Op) const;
>>  +  virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) 
> const;
>>   };
>>   
>>   } // End namespace llvm;
>>  diff --git a/test/CodeGen/R600/complex-folding.ll 
> b/test/CodeGen/R600/complex-folding.ll
>>  new file mode 100644
>>  index 0000000..8dcd450
>>  --- /dev/null
>>  +++ b/test/CodeGen/R600/complex-folding.ll
>>  @@ -0,0 +1,18 @@
>>  +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>>  +
>>  +; CHECK: @main
>>  +; CHECK-NOT: MOV
>>  +define void @main() {
>>  +entry:
>>  +  %0 = call float @llvm.R600.load.input(i32 0)
>>  +  %1 = call float @fabs(float %0)
>>  +  %2 = fptoui float %1 to i32
>>  +  %3 = bitcast i32 %2 to float
>>  +  %4 = insertelement <4 x float> undef, float %3, i32 0
>>  +  call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
>>  +  ret void
>>  +}
>>  +
>>  +declare float @llvm.R600.load.input(i32) readnone
>>  +declare float @fabs(float ) readnone
>>  +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
>>  \ No newline at end of file
>>  -- 
>>  1.8.3.1
>> 
>>  _______________________________________________
>>  llvm-commits mailing list
>>  llvm-commits at cs.uiuc.edu
>>  http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-R600-Move-fabs-fneg-sel-folding-logic-into-PostProce.patch
Type: text/x-patch
Size: 20468 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130908/b95e7055/attachment.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0002-R600-Move-code-handling-literal-folding-into-R600ISe.patch
Type: text/x-patch
Size: 9718 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130908/b95e7055/attachment-0001.bin>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0003-R600-Move-clamp-handling-code-to-R600IselLowering.cp.patch
Type: text/x-patch
Size: 3429 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20130908/b95e7055/attachment-0002.bin>


More information about the llvm-commits mailing list