[PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel

Wed Sep 11 07:22:53 PDT 2013

On Sun, Sep 08, 2013 at 10:42:27AM -0700, Vincent Lejeune wrote:
> I reworked a little this patch and also refactored MOV_IMM_* and CLAMP_R600
> handling code into the R600 postIsel hook in 2 others patches.
> 
> Vincent

All three patches are:

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

> 
> 
> 
> ----- Mail original -----
> > De?: Tom Stellard <tom at stellard.net>
> > ??: Vincent Lejeune <vljn at ovi.com>
> > Cc?: llvm-commits at cs.uiuc.edu
> > Envoy? le : Jeudi 5 septembre 2013 17h49
> > Objet?: Re: [PATCH] R600: Move fabs/fneg folding logic into PostProcessIsel
> > 
> > On Tue, Sep 03, 2013 at 01:11:35AM +0200, Vincent Lejeune wrote:
> >>  This move makes possible to correctly handle multiples instructions
> >>  from a single pattern.
> > 
> > Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
> > 
> >>  ---
> >> ? lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262 
> > +++------------------------------
> >> ? lib/Target/R600/R600ISelLowering.cpp?  | 178 ++++++++++++++++++++++
> >> ? lib/Target/R600/R600ISelLowering.h? ?  |?  1 +
> >> ? test/CodeGen/R600/complex-folding.ll?  |? 18 +++
> >> ? 4 files changed, 214 insertions(+), 245 deletions(-)
> >> ? create mode 100644 test/CodeGen/R600/complex-folding.ll
> >> 
> >>  diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp 
> > b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> >>  index f222901..e099282 100644
> >>  --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> >>  +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> >>  @@ -169,92 +169,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> >> ? ? }
> >> ? ? switch (Opc) {
> >> ? ? default: break;
> >>  -? case AMDGPUISD::CONST_ADDRESS: {
> >>  -? ? for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
> >>  -? ? ? ? ? ? ? ? ? ? ? ? ? ? ? I != SDNode::use_end(); I = Next) {
> >>  -? ? ? Next = llvm::next(I);
> >>  -? ? ? if (!I->isMachineOpcode()) {
> >>  -? ? ? ? continue;
> >>  -? ? ? }
> >>  -? ? ? unsigned Opcode = I->getMachineOpcode();
> >>  -? ? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) 
> >>  -1;
> >>  -? ? ? int SrcIdx = I.getOperandNo();
> >>  -? ? ? int SelIdx;
> >>  -? ? ? // Unlike MachineInstrs, SDNodes do not have results in their 
> > operand
> >>  -? ? ? // list, so we need to increment the SrcIdx, since
> >>  -? ? ? // R600InstrInfo::getOperandIdx is based on the MachineInstr 
> > indices.
> >>  -? ? ? if (HasDst) {
> >>  -? ? ? ? SrcIdx++;
> >>  -? ? ? }
> >>  -
> >>  -? ? ? SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
> >>  -? ? ? if (SelIdx < 0) {
> >>  -? ? ? ? continue;
> >>  -? ? ? }
> >>  -
> >>  -? ? ? SDValue CstOffset;
> >>  -? ? ? if (N->getValueType(0).isVector() ||
> >>  -? ? ? ? ? !SelectGlobalValueConstantOffset(N->getOperand(0), 
> > CstOffset))
> >>  -? ? ? ? continue;
> >>  -
> >>  -? ? ? // Gather constants values
> >>  -? ? ? int SrcIndices[] = {
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >>  -? ? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >>  -? ? ? };
> >>  -? ? ? std::vector<unsigned> Consts;
> >>  -? ? ? for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) 
> > {
> >>  -? ? ? ? int OtherSrcIdx = SrcIndices[i];
> >>  -? ? ? ? int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> >>  -? ? ? ? if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
> >>  -? ? ? ? ? continue;
> >>  -? ? ? ? }
> >>  -? ? ? ? if (HasDst) {
> >>  -? ? ? ? ? OtherSrcIdx--;
> >>  -? ? ? ? ? OtherSelIdx--;
> >>  -? ? ? ? }
> >>  -? ? ? ? if (RegisterSDNode *Reg =
> >>  -? ? ? ? ? ? ? ? ? ? ? ? 
> > dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
> >>  -? ? ? ? ? if (Reg->getReg() == AMDGPU::ALU_CONST) {
> >>  -? ? ? ? ? ? ConstantSDNode *Cst = 
> > dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
> >>  -? ? ? ? ? ? Consts.push_back(Cst->getZExtValue());
> >>  -? ? ? ? ? }
> >>  -? ? ? ? }
> >>  -? ? ? }
> >>  -
> >>  -? ? ? ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> >>  -? ? ? Consts.push_back(Cst->getZExtValue());
> >>  -? ? ? if (!TII->fitsConstReadLimitations(Consts))
> >>  -? ? ? ? continue;
> >>  -
> >>  -? ? ? // Convert back to SDNode indices
> >>  -? ? ? if (HasDst) {
> >>  -? ? ? ? SrcIdx--;
> >>  -? ? ? ? SelIdx--;
> >>  -? ? ? }
> >>  -? ? ? std::vector<SDValue> Ops;
> >>  -? ? ? for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
> >>  -? ? ? ? if (i == SrcIdx) {
> >>  -? ? ? ? ? Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, 
> > MVT::f32));
> >>  -? ? ? ? } else if (i == SelIdx) {
> >>  -? ? ? ? ? Ops.push_back(CstOffset);
> >>  -? ? ? ? } else {
> >>  -? ? ? ? ? Ops.push_back(I->getOperand(i));
> >>  -? ? ? ? }
> >>  -? ? ? }
> >>  -? ? ? CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
> >>  -? ? }
> >>  -? ? break;
> >>  -? }
> >> ? ? case ISD::BUILD_VECTOR: {
> >> ? ? ? const AMDGPUSubtarget &ST = 
> > TM.getSubtarget<AMDGPUSubtarget>();
> >> ? ? ? if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
> >>  @@ -422,38 +336,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> >> ? ? if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> >> ? ? ? const R600InstrInfo *TII =
> >> ? ? ? ? ? static_cast<const R600InstrInfo*>(TM.getInstrInfo());
> >>  -? ? if (Result && Result->isMachineOpcode() && 
> > Result->getMachineOpcode() == AMDGPU::DOT_4) {
> >>  -? ? ? bool IsModified = false;
> >>  -? ? ? do {
> >>  -? ? ? ? std::vector<SDValue> Ops;
> >>  -? ? ? ? for(SDNode::op_iterator I = Result->op_begin(), E = 
> > Result->op_end();
> >>  -? ? ? ? ? ? I != E; ++I)
> >>  -? ? ? ? ? Ops.push_back(*I);
> >>  -? ? ? ? IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, 
> > Ops);
> >>  -? ? ? ? if (IsModified) {
> >>  -? ? ? ? ? Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), 
> > Ops.size());
> >>  -? ? ? ? }
> >>  -? ? ? } while (IsModified);
> >>  -
> >>  -? ? }
> >> ? ? ? if (Result && Result->isMachineOpcode() &&
> >> ? ? ? ? ? !(TII->get(Result->getMachineOpcode()).TSFlags & 
> > R600_InstFlag::VECTOR)
> >> ? ? ? ? ? && 
> > TII->hasInstrModifiers(Result->getMachineOpcode())) {
> >>  -? ? ? // Fold FNEG/FABS
> >>  -? ? ? // TODO: Isel can generate multiple MachineInst, we need to 
> > recursively
> >>  -? ? ? // parse Result
> >>  -? ? ? bool IsModified = false;
> >>  -? ? ? do {
> >>  -? ? ? ? std::vector<SDValue> Ops;
> >>  -? ? ? ? for(SDNode::op_iterator I = Result->op_begin(), E = 
> > Result->op_end();
> >>  -? ? ? ? ? ? I != E; ++I)
> >>  -? ? ? ? ? Ops.push_back(*I);
> >>  -? ? ? ? IsModified = FoldOperands(Result->getMachineOpcode(), TII, 
> > Ops);
> >>  -? ? ? ? if (IsModified) {
> >>  -? ? ? ? ? Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), 
> > Ops.size());
> >>  -? ? ? ? }
> >>  -? ? ? } while (IsModified);
> >>  -
> >> ? ? ? ? // If node has a single use which is CLAMP_R600, folds it
> >> ? ? ? ? if (Result->hasOneUse() && Result->isMachineOpcode()) 
> > {
> >> ? ? ? ? ? SDNode *PotentialClamp = *Result->use_begin();
> >>  @@ -478,120 +363,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> >> ? ? return Result;
> >> ? }
> >> ? 
> >>  -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, 
> > SDValue &Neg,
> >>  -? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?  SDValue &Abs, const R600InstrInfo 
> > *TII) {
> >>  -? switch (Src.getOpcode()) {
> >>  -? case ISD::FNEG:
> >>  -? ? Src = Src.getOperand(0);
> >>  -? ? Neg = CurDAG->getTargetConstant(1, MVT::i32);
> >>  -? ? return true;
> >>  -? case ISD::FABS:
> >>  -? ? if (!Abs.getNode())
> >>  -? ? ? return false;
> >>  -? ? Src = Src.getOperand(0);
> >>  -? ? Abs = CurDAG->getTargetConstant(1, MVT::i32);
> >>  -? ? return true;
> >>  -? case ISD::BITCAST:
> >>  -? ? Src = Src.getOperand(0);
> >>  -? ? return true;
> >>  -? default:
> >>  -? ? return false;
> >>  -? }
> >>  -}
> >>  -
> >>  -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
> >>  -? ? const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> >>  -? int OperandIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> >>  -? };
> >>  -? int SelIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
> >>  -? };
> >>  -? int NegIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> >>  -? };
> >>  -? int AbsIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> >>  -? ? -1
> >>  -? };
> >>  -
> >>  -
> >>  -? for (unsigned i = 0; i < 3; i++) {
> >>  -? ? if (OperandIdx[i] < 0)
> >>  -? ? ? return false;
> >>  -? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >>  -? ? SDValue &Sel = Ops[SelIdx[i] - 1];
> >>  -? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >>  -? ? SDValue FakeAbs;
> >>  -? ? SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
> >>  -? ? if (FoldOperand(Src, Sel, Neg, Abs, TII))
> >>  -? ? ? return true;
> >>  -? }
> >>  -? return false;
> >>  -}
> >>  -
> >>  -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
> >>  -? ? const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> >>  -? int OperandIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >>  -? };
> >>  -? int SelIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
> >>  -? };
> >>  -? int NegIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> >>  -? };
> >>  -? int AbsIdx[] = {
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> >>  -? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> >>  -? };
> >>  -
> >>  -? for (unsigned i = 0; i < 8; i++) {
> >>  -? ? if (OperandIdx[i] < 0)
> >>  -? ? ? return false;
> >>  -? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >>  -? ? SDValue &Sel = Ops[SelIdx[i] - 1];
> >>  -? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >>  -? ? SDValue &Abs = Ops[AbsIdx[i] - 1];
> >>  -? ? if (FoldOperand(Src, Sel, Neg, Abs, TII))
> >>  -? ? ? return true;
> >>  -? }
> >>  -? return false;
> >>  -}
> >> ? 
> >> ? bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int 
> > addrspace) {
> >> ? ? if (!ptr) {
> >>  @@ -804,26 +575,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, 
> > SDValue &U24) {
> >> ? }
> >> ? 
> >> ? void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
> >>  -
> >>  -? if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) 
> > {
> >>  -? ? return;
> >>  -? }
> >>  -
> >>  -? // Go over all selected nodes and try to fold them a bit more
> >> ? ? const AMDGPUTargetLowering& Lowering =
> >> ? ? ? (*(const AMDGPUTargetLowering*)getTargetLowering());
> >>  -? for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> >>  -? ? ?  E = CurDAG->allnodes_end(); I != E; ++I) {
> >>  +? bool IsModified = false;
> >>  +? do {
> >>  +? ? IsModified = false;
> >>  +? ? // Go over all selected nodes and try to fold them a bit more
> >>  +? ? for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> >>  +? ? ? ?  E = CurDAG->allnodes_end(); I != E; ++I) {
> >> ? 
> >>  -? ? SDNode *Node = I;
> >>  +? ? ? SDNode *Node = I;
> >> ? 
> >>  -? ? MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> >>  -? ? if (!MachineNode)
> >>  -? ? ? continue;
> >>  +? ? ? MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> >>  +? ? ? if (!MachineNode)
> >>  +? ? ? ? continue;
> >> ? 
> >>  -? ? SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> >>  -? ? if (ResNode != Node) {
> >>  -? ? ? ReplaceUses(Node, ResNode);
> >>  +? ? ? SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> >>  +? ? ? if (ResNode != Node) {
> >>  +? ? ? ? ReplaceUses(Node, ResNode);
> >>  +? ? ? ? IsModified = true;
> >>  +? ? ? }
> >> ? ? ? }
> >>  -? }
> >>  +? ? CurDAG->RemoveDeadNodes();
> >>  +? } while (IsModified);
> >> ? }
> >>  diff --git a/lib/Target/R600/R600ISelLowering.cpp 
> > b/lib/Target/R600/R600ISelLowering.cpp
> >>  index a89875c..4f0c420 100644
> >>  --- a/lib/Target/R600/R600ISelLowering.cpp
> >>  +++ b/lib/Target/R600/R600ISelLowering.cpp
> >>  @@ -1586,3 +1586,181 @@ SDValue 
> > R600TargetLowering::PerformDAGCombine(SDNode *N,
> >> ? ? }
> >> ? ? return SDValue();
> >> ? }
> >>  +
> >>  +static bool
> >>  +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue 
> > &Neg,
> >>  +? ? ? ? ? ? SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) 
> > {
> >>  +? const R600InstrInfo *TII =
> >>  +? ? ? static_cast<const R600InstrInfo 
> > *>(DAG.getTarget().getInstrInfo());
> >>  +? if (!Src.isMachineOpcode())
> >>  +? ? return false;
> >>  +? switch (Src.getMachineOpcode()) {
> >>  +? case AMDGPU::FNEG_R600:
> >>  +? ? Src = Src.getOperand(0);
> >>  +? ? Neg = DAG.getTargetConstant(1, MVT::i32);
> >>  +? ? return true;
> >>  +? case AMDGPU::FABS_R600:
> >>  +? ? if (!Abs.getNode())
> >>  +? ? ? return false;
> >>  +? ? Src = Src.getOperand(0);
> >>  +? ? Abs = DAG.getTargetConstant(1, MVT::i32);
> >>  +? ? return true;
> >>  +? case AMDGPU::CONST_COPY: {
> >>  +? ? unsigned Opcode = ParentNode->getMachineOpcode();
> >>  +? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > 
> > -1;
> >>  +
> >>  +? ? if (!Sel.getNode())
> >>  +? ? ? return false;
> >>  +
> >>  +? ? SDValue CstOffset = Src.getOperand(0);
> >>  +? ? if (ParentNode->getValueType(0).isVector())
> >>  +? ? ? return false;
> >>  +
> >>  +? ? // Gather constants values
> >>  +? ? int SrcIndices[] = {
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >>  +? ? };
> >>  +? ? std::vector<unsigned> Consts;
> >>  +? ? for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) 
> > {
> >>  +? ? ? int OtherSrcIdx = SrcIndices[i];
> >>  +? ? ? int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> >>  +? ? ? if (OtherSrcIdx < 0 || OtherSelIdx < 0)
> >>  +? ? ? ? continue;
> >>  +? ? ? if (HasDst) {
> >>  +? ? ? ? OtherSrcIdx--;
> >>  +? ? ? ? OtherSelIdx--;
> >>  +? ? ? }
> >>  +? ? ? if (RegisterSDNode *Reg =
> >>  +? ? ? ? ? 
> > dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
> >>  +? ? ? ? if (Reg->getReg() == AMDGPU::ALU_CONST) {
> >>  +? ? ? ? ? ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
> >>  +? ? ? ? ? ? ? ParentNode->getOperand(OtherSelIdx));
> >>  +? ? ? ? ? Consts.push_back(Cst->getZExtValue());
> >>  +? ? ? ? }
> >>  +? ? ? }
> >>  +? ? }
> >>  +
> >>  +? ? ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> >>  +? ? Consts.push_back(Cst->getZExtValue());
> >>  +? ? if (!TII->fitsConstReadLimitations(Consts)) {
> >>  +? ? ? return false;
> >>  +? ? }
> >>  +
> >>  +? ? Sel = CstOffset;
> >>  +? ? Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
> >>  +? ? return true;
> >>  +? }
> >>  +? default:
> >>  +? ? return false;
> >>  +? }
> >>  +}
> >>  +
> >>  +
> >>  +/// \brief Fold the instructions after selecting them
> >>  +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
> >>  +? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? SelectionDAG &DAG) const 
> > {
> >>  +? const R600InstrInfo *TII =
> >>  +? ? ? static_cast<const R600InstrInfo 
> > *>(DAG.getTarget().getInstrInfo());
> >>  +? if (!Node->isMachineOpcode())
> >>  +? ? return Node;
> >>  +? unsigned Opcode = Node->getMachineOpcode();
> >>  +
> >>  +? std::vector<SDValue> Ops;
> >>  +? for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
> >>  +? ? ? ? ? ? ? I != E; ++I)
> >>  +??? ? Ops.push_back(*I);
> >>  +
> >>  +? if (Opcode == AMDGPU::DOT_4) {
> >>  +? ? int OperandIdx[] = {
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> >>  +??? };
> >>  +? ? int NegIdx[] = {
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> >>  +? ? };
> >>  +? ? int AbsIdx[] = {
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> >>  +? ? };
> >>  +? ? for (unsigned i = 0; i < 8; i++) {
> >>  +? ? ? if (OperandIdx[i] < 0)
> >>  +? ? ? ? return Node;
> >>  +? ? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >>  +? ? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >>  +? ? ? SDValue &Abs = Ops[AbsIdx[i] - 1];
> >>  +? ? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) 
> >>  -1;
> >>  +? ? ? int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> >>  +? ? ? if (HasDst)
> >>  +? ? ? ? SelIdx--;
> >>  +? ? ? SDValue FakeSel;
> >>  +? ? ? SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
> >>  +? ? ? if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> >>  +? ? ? ? return DAG.getMachineNode(Opcode, SDLoc(Node), 
> > Node->getVTList(), Ops);
> >>  +? ? }
> >>  +? } else {
> >>  +? ? if (!TII->hasInstrModifiers(Opcode))
> >>  +? ? ? return Node;
> >>  +? ? int OperandIdx[] = {
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> >>  +? ? };
> >>  +? ? int NegIdx[] = {
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> >>  +? ? };
> >>  +? ? int AbsIdx[] = {
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> >>  +? ? ? TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> >>  +? ? ? -1
> >>  +? ? };
> >>  +? ? for (unsigned i = 0; i < 3; i++) {
> >>  +? ? ? if (OperandIdx[i] < 0)
> >>  +? ? ? ? return Node;
> >>  +? ? ? SDValue &Src = Ops[OperandIdx[i] - 1];
> >>  +? ? ? SDValue &Neg = Ops[NegIdx[i] - 1];
> >>  +? ? ? SDValue FakeAbs;
> >>  +? ? ? SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : 
> > FakeAbs;
> >>  +? ? ? bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) 
> >>  -1;
> >>  +? ? ? int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> >>  +? ? ? if (HasDst)
> >>  +? ? ? ? SelIdx--;
> >>  +? ? ? SDValue FakeSel;
> >>  +? ? ? SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeSel;
> >>  +? ? ? if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> >>  +? ? ? ? return DAG.getMachineNode(Opcode, SDLoc(Node), 
> > Node->getVTList(), Ops);
> >>  +? ? }
> >>  +? }
> >>  +
> >>  +? return Node;
> >>  +}
> >>  diff --git a/lib/Target/R600/R600ISelLowering.h 
> > b/lib/Target/R600/R600ISelLowering.h
> >>  index a033fcb..93c3779 100644
> >>  --- a/lib/Target/R600/R600ISelLowering.h
> >>  +++ b/lib/Target/R600/R600ISelLowering.h
> >>  @@ -68,6 +68,7 @@ private:
> >> ? ? void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
> >> ? ? ? ? ? ? ? ? ? ? ? ?  unsigned &Channel, unsigned &PtrIncr) 
> > const;
> >> ? ? bool isZero(SDValue Op) const;
> >>  +? virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) 
> > const;
> >> ? };
> >> ? 
> >> ? } // End namespace llvm;
> >>  diff --git a/test/CodeGen/R600/complex-folding.ll 
> > b/test/CodeGen/R600/complex-folding.ll
> >>  new file mode 100644
> >>  index 0000000..8dcd450
> >>  --- /dev/null
> >>  +++ b/test/CodeGen/R600/complex-folding.ll
> >>  @@ -0,0 +1,18 @@
> >>  +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> >>  +
> >>  +; CHECK: @main
> >>  +; CHECK-NOT: MOV
> >>  +define void @main() {
> >>  +entry:
> >>  +? %0 = call float @llvm.R600.load.input(i32 0)
> >>  +? %1 = call float @fabs(float %0)
> >>  +? %2 = fptoui float %1 to i32
> >>  +? %3 = bitcast i32 %2 to float
> >>  +? %4 = insertelement <4 x float> undef, float %3, i32 0
> >>  +? call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
> >>  +? ret void
> >>  +}
> >>  +
> >>  +declare float @llvm.R600.load.input(i32) readnone
> >>  +declare float @fabs(float ) readnone
> >>  +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> >>  \ No newline at end of file
> >>  -- 
> >>  1.8.3.1
> >> 
> >>  _______________________________________________
> >>  llvm-commits mailing list
> >>  llvm-commits at cs.uiuc.edu
> >>  http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> > 

> From 63e1c1bc74d517143a7a41eaf35953fabb962167 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Tue, 20 Aug 2013 00:47:24 +0200
> Subject: [PATCH 1/3] R600: Move fabs/fneg/sel folding logic into
>  PostProcessIsel
> 
> This move makes possible to correctly handle multiples instructions
> from a single pattern.
> ---
>  lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 262 +++------------------------------
>  lib/Target/R600/R600ISelLowering.cpp   | 179 ++++++++++++++++++++++
>  lib/Target/R600/R600ISelLowering.h     |   1 +
>  test/CodeGen/R600/complex-folding.ll   |  18 +++
>  4 files changed, 215 insertions(+), 245 deletions(-)
>  create mode 100644 test/CodeGen/R600/complex-folding.ll
> 
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 46e50bc..85e1422 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -201,92 +201,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>    }
>    switch (Opc) {
>    default: break;
> -  case AMDGPUISD::CONST_ADDRESS: {
> -    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
> -                              I != SDNode::use_end(); I = Next) {
> -      Next = llvm::next(I);
> -      if (!I->isMachineOpcode()) {
> -        continue;
> -      }
> -      unsigned Opcode = I->getMachineOpcode();
> -      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> -      int SrcIdx = I.getOperandNo();
> -      int SelIdx;
> -      // Unlike MachineInstrs, SDNodes do not have results in their operand
> -      // list, so we need to increment the SrcIdx, since
> -      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
> -      if (HasDst) {
> -        SrcIdx++;
> -      }
> -
> -      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
> -      if (SelIdx < 0) {
> -        continue;
> -      }
> -
> -      SDValue CstOffset;
> -      if (N->getValueType(0).isVector() ||
> -          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
> -        continue;
> -
> -      // Gather constants values
> -      int SrcIndices[] = {
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> -        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> -      };
> -      std::vector<unsigned> Consts;
> -      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
> -        int OtherSrcIdx = SrcIndices[i];
> -        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> -        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
> -          continue;
> -        }
> -        if (HasDst) {
> -          OtherSrcIdx--;
> -          OtherSelIdx--;
> -        }
> -        if (RegisterSDNode *Reg =
> -                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
> -          if (Reg->getReg() == AMDGPU::ALU_CONST) {
> -            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
> -            Consts.push_back(Cst->getZExtValue());
> -          }
> -        }
> -      }
> -
> -      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> -      Consts.push_back(Cst->getZExtValue());
> -      if (!TII->fitsConstReadLimitations(Consts))
> -        continue;
> -
> -      // Convert back to SDNode indices
> -      if (HasDst) {
> -        SrcIdx--;
> -        SelIdx--;
> -      }
> -      std::vector<SDValue> Ops;
> -      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
> -        if (i == SrcIdx) {
> -          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
> -        } else if (i == SelIdx) {
> -          Ops.push_back(CstOffset);
> -        } else {
> -          Ops.push_back(I->getOperand(i));
> -        }
> -      }
> -      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
> -    }
> -    break;
> -  }
>    case ISD::BUILD_VECTOR: {
>      unsigned RegClassID;
>      const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> @@ -508,38 +422,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
>      const R600InstrInfo *TII =
>          static_cast<const R600InstrInfo*>(TM.getInstrInfo());
> -    if (Result && Result->isMachineOpcode() && Result->getMachineOpcode() == AMDGPU::DOT_4) {
> -      bool IsModified = false;
> -      do {
> -        std::vector<SDValue> Ops;
> -        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
> -            I != E; ++I)
> -          Ops.push_back(*I);
> -        IsModified = FoldDotOperands(Result->getMachineOpcode(), TII, Ops);
> -        if (IsModified) {
> -          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
> -        }
> -      } while (IsModified);
> -
> -    }
>      if (Result && Result->isMachineOpcode() &&
>          !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
>          && TII->hasInstrModifiers(Result->getMachineOpcode())) {
> -      // Fold FNEG/FABS
> -      // TODO: Isel can generate multiple MachineInst, we need to recursively
> -      // parse Result
> -      bool IsModified = false;
> -      do {
> -        std::vector<SDValue> Ops;
> -        for(SDNode::op_iterator I = Result->op_begin(), E = Result->op_end();
> -            I != E; ++I)
> -          Ops.push_back(*I);
> -        IsModified = FoldOperands(Result->getMachineOpcode(), TII, Ops);
> -        if (IsModified) {
> -          Result = CurDAG->UpdateNodeOperands(Result, Ops.data(), Ops.size());
> -        }
> -      } while (IsModified);
> -
>        // If node has a single use which is CLAMP_R600, folds it
>        if (Result->hasOneUse() && Result->isMachineOpcode()) {
>          SDNode *PotentialClamp = *Result->use_begin();
> @@ -564,120 +449,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>    return Result;
>  }
>  
> -bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
> -                                     SDValue &Abs, const R600InstrInfo *TII) {
> -  switch (Src.getOpcode()) {
> -  case ISD::FNEG:
> -    Src = Src.getOperand(0);
> -    Neg = CurDAG->getTargetConstant(1, MVT::i32);
> -    return true;
> -  case ISD::FABS:
> -    if (!Abs.getNode())
> -      return false;
> -    Src = Src.getOperand(0);
> -    Abs = CurDAG->getTargetConstant(1, MVT::i32);
> -    return true;
> -  case ISD::BITCAST:
> -    Src = Src.getOperand(0);
> -    return true;
> -  default:
> -    return false;
> -  }
> -}
> -
> -bool AMDGPUDAGToDAGISel::FoldOperands(unsigned Opcode,
> -    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> -  int OperandIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> -  };
> -  int SelIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_sel)
> -  };
> -  int NegIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> -  };
> -  int AbsIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> -    -1
> -  };
> -
> -
> -  for (unsigned i = 0; i < 3; i++) {
> -    if (OperandIdx[i] < 0)
> -      return false;
> -    SDValue &Src = Ops[OperandIdx[i] - 1];
> -    SDValue &Sel = Ops[SelIdx[i] - 1];
> -    SDValue &Neg = Ops[NegIdx[i] - 1];
> -    SDValue FakeAbs;
> -    SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
> -    if (FoldOperand(Src, Sel, Neg, Abs, TII))
> -      return true;
> -  }
> -  return false;
> -}
> -
> -bool AMDGPUDAGToDAGISel::FoldDotOperands(unsigned Opcode,
> -    const R600InstrInfo *TII, std::vector<SDValue> &Ops) {
> -  int OperandIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> -  };
> -  int SelIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_sel_W),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_sel_W)
> -  };
> -  int NegIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> -  };
> -  int AbsIdx[] = {
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> -    TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> -  };
> -
> -  for (unsigned i = 0; i < 8; i++) {
> -    if (OperandIdx[i] < 0)
> -      return false;
> -    SDValue &Src = Ops[OperandIdx[i] - 1];
> -    SDValue &Sel = Ops[SelIdx[i] - 1];
> -    SDValue &Neg = Ops[NegIdx[i] - 1];
> -    SDValue &Abs = Ops[AbsIdx[i] - 1];
> -    if (FoldOperand(Src, Sel, Neg, Abs, TII))
> -      return true;
> -  }
> -  return false;
> -}
>  
>  bool AMDGPUDAGToDAGISel::checkType(const Value *ptr, unsigned int addrspace) {
>    if (!ptr) {
> @@ -890,26 +661,27 @@ bool AMDGPUDAGToDAGISel::SelectU24(SDValue Op, SDValue &U24) {
>  }
>  
>  void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
> -
> -  if (Subtarget.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) {
> -    return;
> -  }
> -
> -  // Go over all selected nodes and try to fold them a bit more
>    const AMDGPUTargetLowering& Lowering =
>      (*(const AMDGPUTargetLowering*)getTargetLowering());
> -  for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> -       E = CurDAG->allnodes_end(); I != E; ++I) {
> +  bool IsModified = false;
> +  do {
> +    IsModified = false;
> +    // Go over all selected nodes and try to fold them a bit more
> +    for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
> +         E = CurDAG->allnodes_end(); I != E; ++I) {
>  
> -    SDNode *Node = I;
> +      SDNode *Node = I;
>  
> -    MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> -    if (!MachineNode)
> -      continue;
> +      MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(I);
> +      if (!MachineNode)
> +        continue;
>  
> -    SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> -    if (ResNode != Node) {
> -      ReplaceUses(Node, ResNode);
> +      SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG);
> +      if (ResNode != Node) {
> +        ReplaceUses(Node, ResNode);
> +        IsModified = true;
> +      }
>      }
> -  }
> +    CurDAG->RemoveDeadNodes();
> +  } while (IsModified);
>  }
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index f0242b8..34a8506 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -1629,3 +1629,182 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
>    }
>    return SDValue();
>  }
> +
> +static bool
> +FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
> +            SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) {
> +  const R600InstrInfo *TII =
> +      static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
> +  if (!Src.isMachineOpcode())
> +    return false;
> +  switch (Src.getMachineOpcode()) {
> +  case AMDGPU::FNEG_R600:
> +    if (!Neg.getNode())
> +      return false;
> +    Src = Src.getOperand(0);
> +    Neg = DAG.getTargetConstant(1, MVT::i32);
> +    return true;
> +  case AMDGPU::FABS_R600:
> +    if (!Abs.getNode())
> +      return false;
> +    Src = Src.getOperand(0);
> +    Abs = DAG.getTargetConstant(1, MVT::i32);
> +    return true;
> +  case AMDGPU::CONST_COPY: {
> +    unsigned Opcode = ParentNode->getMachineOpcode();
> +    bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> +
> +    if (!Sel.getNode())
> +      return false;
> +
> +    SDValue CstOffset = Src.getOperand(0);
> +    if (ParentNode->getValueType(0).isVector())
> +      return false;
> +
> +    // Gather constants values
> +    int SrcIndices[] = {
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> +    };
> +    std::vector<unsigned> Consts;
> +    for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
> +      int OtherSrcIdx = SrcIndices[i];
> +      int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
> +      if (OtherSrcIdx < 0 || OtherSelIdx < 0)
> +        continue;
> +      if (HasDst) {
> +        OtherSrcIdx--;
> +        OtherSelIdx--;
> +      }
> +      if (RegisterSDNode *Reg =
> +          dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
> +        if (Reg->getReg() == AMDGPU::ALU_CONST) {
> +          ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(
> +              ParentNode->getOperand(OtherSelIdx));
> +          Consts.push_back(Cst->getZExtValue());
> +        }
> +      }
> +    }
> +
> +    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
> +    Consts.push_back(Cst->getZExtValue());
> +    if (!TII->fitsConstReadLimitations(Consts)) {
> +      return false;
> +    }
> +
> +    Sel = CstOffset;
> +    Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
> +    return true;
> +  }
> +  default:
> +    return false;
> +  }
> +}
> +
> +
> +/// \brief Fold the instructions after selecting them
> +SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
> +                                            SelectionDAG &DAG) const {
> +  const R600InstrInfo *TII =
> +      static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
> +  if (!Node->isMachineOpcode())
> +    return Node;
> +  unsigned Opcode = Node->getMachineOpcode();
> +  SDValue FakeOp;
> +
> +  std::vector<SDValue> Ops;
> +  for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end();
> +              I != E; ++I)
> +	  Ops.push_back(*I);
> +
> +  if (Opcode == AMDGPU::DOT_4) {
> +    int OperandIdx[] = {
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
> +	};
> +    int NegIdx[] = {
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
> +    };
> +    int AbsIdx[] = {
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
> +    };
> +    for (unsigned i = 0; i < 8; i++) {
> +      if (OperandIdx[i] < 0)
> +        return Node;
> +      SDValue &Src = Ops[OperandIdx[i] - 1];
> +      SDValue &Neg = Ops[NegIdx[i] - 1];
> +      SDValue &Abs = Ops[AbsIdx[i] - 1];
> +      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> +      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> +      if (HasDst)
> +        SelIdx--;
> +      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> +      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> +        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> +    }
> +  } else {
> +    if (!TII->hasInstrModifiers(Opcode))
> +      return Node;
> +    int OperandIdx[] = {
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
> +    };
> +    int NegIdx[] = {
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
> +    };
> +    int AbsIdx[] = {
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
> +      TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
> +      -1
> +    };
> +    for (unsigned i = 0; i < 3; i++) {
> +      if (OperandIdx[i] < 0)
> +        return Node;
> +      SDValue &Src = Ops[OperandIdx[i] - 1];
> +      SDValue &Neg = Ops[NegIdx[i] - 1];
> +      SDValue FakeAbs;
> +      SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
> +      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
> +      int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> +      if (HasDst)
> +        SelIdx--;
> +      SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> +      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> +        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> +    }
> +  }
> +
> +  return Node;
> +}
> diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h
> index a033fcb..93c3779 100644
> --- a/lib/Target/R600/R600ISelLowering.h
> +++ b/lib/Target/R600/R600ISelLowering.h
> @@ -68,6 +68,7 @@ private:
>    void getStackAddress(unsigned StackWidth, unsigned ElemIdx,
>                         unsigned &Channel, unsigned &PtrIncr) const;
>    bool isZero(SDValue Op) const;
> +  virtual SDNode *PostISelFolding(MachineSDNode *N, SelectionDAG &DAG) const;
>  };
>  
>  } // End namespace llvm;
> diff --git a/test/CodeGen/R600/complex-folding.ll b/test/CodeGen/R600/complex-folding.ll
> new file mode 100644
> index 0000000..8dcd450
> --- /dev/null
> +++ b/test/CodeGen/R600/complex-folding.ll
> @@ -0,0 +1,18 @@
> +;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
> +
> +; CHECK: @main
> +; CHECK-NOT: MOV
> +define void @main() {
> +entry:
> +  %0 = call float @llvm.R600.load.input(i32 0)
> +  %1 = call float @fabs(float %0)
> +  %2 = fptoui float %1 to i32
> +  %3 = bitcast i32 %2 to float
> +  %4 = insertelement <4 x float> undef, float %3, i32 0
> +  call void @llvm.R600.store.swizzle(<4 x float> %4, i32 0, i32 0)
> +  ret void
> +}
> +
> +declare float @llvm.R600.load.input(i32) readnone
> +declare float @fabs(float ) readnone
> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> \ No newline at end of file
> -- 
> 1.8.3.1
> 

> From 2ca03e40f47caf30e8613891a2a57d84de01ab36 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sun, 8 Sep 2013 17:17:47 +0200
> Subject: [PATCH 2/3] R600: Move code handling literal folding into
>  R600ISelLowering.
> 
> ---
>  lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 105 ---------------------------------
>  lib/Target/R600/R600ISelLowering.cpp   |  63 ++++++++++++++++++--
>  test/CodeGen/R600/literals.ll          |  16 +++++
>  3 files changed, 75 insertions(+), 109 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 85e1422..95037ba 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -193,8 +193,6 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SDValue Addr, SDValue& R1, SDValue& R2) {
>  }
>  
>  SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
> -  const R600InstrInfo *TII =
> -                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
>    unsigned int Opc = N->getOpcode();
>    if (N->isMachineOpcode()) {
>      return NULL;   // Already selected.
> @@ -310,109 +308,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>      return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
>                                    SDLoc(N), N->getValueType(0), Ops);
>    }
> -
> -  case ISD::ConstantFP:
> -  case ISD::Constant: {
> -    const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> -    // XXX: Custom immediate lowering not implemented yet.  Instead we use
> -    // pseudo instructions defined in SIInstructions.td
> -    if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
> -      break;
> -    }
> -
> -    uint64_t ImmValue = 0;
> -    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
> -
> -    if (N->getOpcode() == ISD::ConstantFP) {
> -      // XXX: 64-bit Immediates not supported yet
> -      assert(N->getValueType(0) != MVT::f64);
> -
> -      ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N);
> -      APFloat Value = C->getValueAPF();
> -      float FloatValue = Value.convertToFloat();
> -      if (FloatValue == 0.0) {
> -        ImmReg = AMDGPU::ZERO;
> -      } else if (FloatValue == 0.5) {
> -        ImmReg = AMDGPU::HALF;
> -      } else if (FloatValue == 1.0) {
> -        ImmReg = AMDGPU::ONE;
> -      } else {
> -        ImmValue = Value.bitcastToAPInt().getZExtValue();
> -      }
> -    } else {
> -      // XXX: 64-bit Immediates not supported yet
> -      assert(N->getValueType(0) != MVT::i64);
> -
> -      ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
> -      if (C->getZExtValue() == 0) {
> -        ImmReg = AMDGPU::ZERO;
> -      } else if (C->getZExtValue() == 1) {
> -        ImmReg = AMDGPU::ONE_INT;
> -      } else {
> -        ImmValue = C->getZExtValue();
> -      }
> -    }
> -
> -    for (SDNode::use_iterator Use = N->use_begin(), Next = llvm::next(Use);
> -                              Use != SDNode::use_end(); Use = Next) {
> -      Next = llvm::next(Use);
> -      std::vector<SDValue> Ops;
> -      for (unsigned i = 0; i < Use->getNumOperands(); ++i) {
> -        Ops.push_back(Use->getOperand(i));
> -      }
> -
> -      if (!Use->isMachineOpcode()) {
> -          if (ImmReg == AMDGPU::ALU_LITERAL_X) {
> -            // We can only use literal constants (e.g. AMDGPU::ZERO,
> -            // AMDGPU::ONE, etc) in machine opcodes.
> -            continue;
> -          }
> -      } else {
> -        switch(Use->getMachineOpcode()) {
> -        case AMDGPU::REG_SEQUENCE: break;
> -        default:
> -          if (!TII->isALUInstr(Use->getMachineOpcode()) ||
> -              (TII->get(Use->getMachineOpcode()).TSFlags &
> -               R600_InstFlag::VECTOR)) {
> -            continue;
> -          }
> -        }
> -
> -        // Check that we aren't already using an immediate.
> -        // XXX: It's possible for an instruction to have more than one
> -        // immediate operand, but this is not supported yet.
> -        if (ImmReg == AMDGPU::ALU_LITERAL_X) {
> -          int ImmIdx = TII->getOperandIdx(Use->getMachineOpcode(),
> -                                          AMDGPU::OpName::literal);
> -          if (ImmIdx == -1) {
> -            continue;
> -          }
> -
> -          if (TII->getOperandIdx(Use->getMachineOpcode(),
> -                                 AMDGPU::OpName::dst) != -1) {
> -            // subtract one from ImmIdx, because the DST operand is usually index
> -            // 0 for MachineInstrs, but we have no DST in the Ops vector.
> -            ImmIdx--;
> -          }
> -          ConstantSDNode *C = dyn_cast<ConstantSDNode>(Use->getOperand(ImmIdx));
> -          assert(C);
> -
> -          if (C->getZExtValue() != 0) {
> -            // This instruction is already using an immediate.
> -            continue;
> -          }
> -
> -          // Set the immediate value
> -          Ops[ImmIdx] = CurDAG->getTargetConstant(ImmValue, MVT::i32);
> -        }
> -      }
> -      // Set the immediate register
> -      Ops[Use.getOperandNo()] = CurDAG->getRegister(ImmReg, MVT::i32);
> -
> -      CurDAG->UpdateNodeOperands(*Use, Ops.data(), Use->getNumOperands());
> -    }
> -    break;
> -  }
>    }
>    SDNode *Result = SelectCode(N);
>  
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index 34a8506..ab3911f 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -1632,7 +1632,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
>  
>  static bool
>  FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
> -            SDValue &Abs, SDValue &Sel, SelectionDAG &DAG) {
> +            SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
>    const R600InstrInfo *TII =
>        static_cast<const R600InstrInfo *>(DAG.getTarget().getInstrInfo());
>    if (!Src.isMachineOpcode())
> @@ -1705,6 +1705,51 @@ FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
>      Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
>      return true;
>    }
> +  case AMDGPU::MOV_IMM_I32:
> +  case AMDGPU::MOV_IMM_F32: {
> +    unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
> +    uint64_t ImmValue = 0;
> +
> +
> +    if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
> +      ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
> +      float FloatValue = FPC->getValueAPF().convertToFloat();
> +      if (FloatValue == 0.0) {
> +        ImmReg = AMDGPU::ZERO;
> +      } else if (FloatValue == 0.5) {
> +        ImmReg = AMDGPU::HALF;
> +      } else if (FloatValue == 1.0) {
> +        ImmReg = AMDGPU::ONE;
> +      } else {
> +        ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
> +      }
> +    } else {
> +      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(0));
> +      uint64_t Value = C->getZExtValue();
> +      if (Value == 0) {
> +        ImmReg = AMDGPU::ZERO;
> +      } else if (Value == 1) {
> +        ImmReg = AMDGPU::ONE_INT;
> +      } else {
> +        ImmValue = Value;
> +      }
> +    }
> +
> +    // Check that we aren't already using an immediate.
> +    // XXX: It's possible for an instruction to have more than one
> +    // immediate operand, but this is not supported yet.
> +    if (ImmReg == AMDGPU::ALU_LITERAL_X) {
> +      if (!Imm.getNode())
> +        return false;
> +      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Imm);
> +      assert(C);
> +      if (C->getZExtValue())
> +        return false;
> +    }
> +    Imm = DAG.getTargetConstant(ImmValue, MVT::i32);
> +    Src = DAG.getRegister(ImmReg, MVT::i32);
> +    return true;
> +  }
>    default:
>      return false;
>    }
> @@ -1768,7 +1813,13 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
>        if (HasDst)
>          SelIdx--;
>        SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> -      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> +      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
> +        return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
> +    }
> +  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
> +    for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
> +      SDValue &Src = Ops[i];
> +      if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
>          return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
>      }
>    } else {
> @@ -1798,10 +1849,14 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
>        SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
>        bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
>        int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
> -      if (HasDst)
> +      int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
> +      if (HasDst) {
>          SelIdx--;
> +        ImmIdx--;
> +      }
>        SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
> -      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, DAG))
> +      SDValue &Imm = Ops[ImmIdx];
> +      if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
>          return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
>      }
>    }
> diff --git a/test/CodeGen/R600/literals.ll b/test/CodeGen/R600/literals.ll
> index e5bdbc4..47191e0 100644
> --- a/test/CodeGen/R600/literals.ll
> +++ b/test/CodeGen/R600/literals.ll
> @@ -46,3 +46,19 @@ entry:
>    store <4 x i32> <i32 0, i32 0, i32 0, i32 0>, <4 x i32> addrspace(1)* %out
>    ret void
>  }
> +
> +; CHECK: @inline_literal_dot4
> +; CHECK: DOT4 T[[GPR:[0-9]]].X, 1.0
> +; CHECK-NEXT: DOT4 T[[GPR]].Y (MASKED), 1.0
> +; CHECK-NEXT: DOT4 T[[GPR]].Z (MASKED), 1.0
> +; CHECK-NEXT: DOT4 * T[[GPR]].W (MASKED), 1.0
> +define void @inline_literal_dot4(float addrspace(1)* %out) {
> +entry:
> +  %0 = call float @llvm.AMDGPU.dp4(<4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>)
> +  store float %0, float addrspace(1)* %out
> +  ret void
> +}
> +
> +declare float @llvm.AMDGPU.dp4(<4 x float>, <4 x float>) #1
> +
> +attributes #1 = { readnone }
> -- 
> 1.8.3.1
> 

> From d81d8629e028564145b753c6fb1e370b641d5096 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Sun, 8 Sep 2013 19:18:12 +0200
> Subject: [PATCH 3/3] R600: Move clamp handling code to R600IselLowering.cpp
> 
> ---
>  lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 34 +---------------------------------
>  lib/Target/R600/R600ISelLowering.cpp   | 16 ++++++++++++++++
>  2 files changed, 17 insertions(+), 33 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> index 95037ba..a008c96 100644
> --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp
> @@ -309,39 +309,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
>                                    SDLoc(N), N->getValueType(0), Ops);
>    }
>    }
> -  SDNode *Result = SelectCode(N);
> -
> -  // Fold operands of selected node
> -
> -  const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
> -  if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> -    const R600InstrInfo *TII =
> -        static_cast<const R600InstrInfo*>(TM.getInstrInfo());
> -    if (Result && Result->isMachineOpcode() &&
> -        !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
> -        && TII->hasInstrModifiers(Result->getMachineOpcode())) {
> -      // If node has a single use which is CLAMP_R600, folds it
> -      if (Result->hasOneUse() && Result->isMachineOpcode()) {
> -        SDNode *PotentialClamp = *Result->use_begin();
> -        if (PotentialClamp->isMachineOpcode() &&
> -            PotentialClamp->getMachineOpcode() == AMDGPU::CLAMP_R600) {
> -          unsigned ClampIdx =
> -            TII->getOperandIdx(Result->getMachineOpcode(), AMDGPU::OpName::clamp);
> -          std::vector<SDValue> Ops;
> -          unsigned NumOp = Result->getNumOperands();
> -          for (unsigned i = 0; i < NumOp; ++i) {
> -            Ops.push_back(Result->getOperand(i));
> -          }
> -          Ops[ClampIdx - 1] = CurDAG->getTargetConstant(1, MVT::i32);
> -          Result = CurDAG->SelectNodeTo(PotentialClamp,
> -              Result->getMachineOpcode(), PotentialClamp->getVTList(),
> -              Ops.data(), NumOp);
> -        }
> -      }
> -    }
> -  }
> -
> -  return Result;
> +  return SelectCode(N);
>  }
>  
>  
> diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> index ab3911f..95ed144 100644
> --- a/lib/Target/R600/R600ISelLowering.cpp
> +++ b/lib/Target/R600/R600ISelLowering.cpp
> @@ -1822,6 +1822,22 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
>        if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
>          return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
>      }
> +  } else if (Opcode == AMDGPU::CLAMP_R600) {
> +    SDValue Src = Node->getOperand(0);
> +    if (!Src.isMachineOpcode() ||
> +        !TII->hasInstrModifiers(Src.getMachineOpcode()))
> +      return Node;
> +    int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
> +        AMDGPU::OpName::clamp);
> +    if (ClampIdx < 0)
> +      return Node;
> +    std::vector<SDValue> Ops;
> +    unsigned NumOp = Src.getNumOperands();
> +    for(unsigned i = 0; i < NumOp; ++i)
> +  	  Ops.push_back(Src.getOperand(i));
> +    Ops[ClampIdx - 1] = DAG.getTargetConstant(1, MVT::i32);
> +    return DAG.getMachineNode(Src.getMachineOpcode(), SDLoc(Node),
> +        Node->getVTList(), Ops);
>    } else {
>      if (!TII->hasInstrModifiers(Opcode))
>        return Node;
> -- 
> 1.8.3.1
>