[llvm] r186919 - R600: Move CONST_ADDRESS folding into AMDGPUDAGToDAGISel::Select()

Mon Jul 22 18:48:24 PDT 2013

Author: tstellar
Date: Mon Jul 22 20:48:24 2013
New Revision: 186919

URL: http://llvm.org/viewvc/llvm-project?rev=186919&view=rev
Log:
R600: Move CONST_ADDRESS folding into AMDGPUDAGToDAGISel::Select()

This increases the number of opportunites we have for folding.  With the
previous implementation we were unable to fold into any instructions
other than the first when multiple instructions were selected from a
single SDNode.

Reviewed-by: Vincent Lejeune <vljn at ovi.com>

Modified:
    llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
    llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
    llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
    llvm/trunk/lib/Target/R600/R600InstrInfo.h
    llvm/trunk/test/CodeGen/R600/bfi_int.ll

Modified: llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp?rev=186919&r1=186918&r2=186919&view=diff
==============================================================================

--- llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp Mon Jul 22 20:48:24 2013
@@ -50,7 +50,7 @@ public:
 private:
   inline SDValue getSmallIPtrImm(unsigned Imm);
   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
-                   const R600InstrInfo *TII, std::vector<unsigned> Cst);
+                   const R600InstrInfo *TII);
   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
   bool FoldDotOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
 
@@ -158,12 +158,100 @@ bool AMDGPUDAGToDAGISel::SelectADDR64(SD
 }
 
 SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) {
+  const R600InstrInfo *TII =
+                      static_cast<const R600InstrInfo*>(TM.getInstrInfo());
   unsigned int Opc = N->getOpcode();
   if (N->isMachineOpcode()) {
     return NULL;   // Already selected.
   }
   switch (Opc) {
   default: break;
+  case AMDGPUISD::CONST_ADDRESS: {
+    for (SDNode::use_iterator I = N->use_begin(), Next = llvm::next(I);
+                              I != SDNode::use_end(); I = Next) {
+      Next = llvm::next(I);
+      if (!I->isMachineOpcode()) {
+        continue;
+      }
+      unsigned Opcode = I->getMachineOpcode();
+      bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
+      int SrcIdx = I.getOperandNo();
+      int SelIdx;
+      // Unlike MachineInstrs, SDNodes do not have results in their operand
+      // list, so we need to increment the SrcIdx, since
+      // R600InstrInfo::getOperandIdx is based on the MachineInstr indices.
+      if (HasDst) {
+        SrcIdx++;
+      }
+
+      SelIdx = TII->getSelIdx(I->getMachineOpcode(), SrcIdx);
+      if (SelIdx < 0) {
+        continue;
+      }
+
+      SDValue CstOffset;
+      if (N->getValueType(0).isVector() ||
+          !SelectGlobalValueConstantOffset(N->getOperand(0), CstOffset))
+        continue;
+
+      // Gather constants values
+      int SrcIndices[] = {
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
+        TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
+      };
+      std::vector<unsigned> Consts;
+      for (unsigned i = 0; i < sizeof(SrcIndices) / sizeof(int); i++) {
+        int OtherSrcIdx = SrcIndices[i];
+        int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
+        if (OtherSrcIdx < 0 || OtherSelIdx < 0) {
+          continue;
+        }
+        if (HasDst) {
+          OtherSrcIdx--;
+          OtherSelIdx--;
+        }
+        if (RegisterSDNode *Reg =
+                         dyn_cast<RegisterSDNode>(I->getOperand(OtherSrcIdx))) {
+          if (Reg->getReg() == AMDGPU::ALU_CONST) {
+            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(I->getOperand(OtherSelIdx));
+            Consts.push_back(Cst->getZExtValue());
+          }
+        }
+      }
+
+      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+      Consts.push_back(Cst->getZExtValue());
+      if (!TII->fitsConstReadLimitations(Consts))
+        continue;
+
+      // Convert back to SDNode indices
+      if (HasDst) {
+        SrcIdx--;
+        SelIdx--;
+      }
+      std::vector<SDValue> Ops;
+      for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
+        if (i == SrcIdx) {
+          Ops.push_back(CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32));
+        } else if (i == SelIdx) {
+          Ops.push_back(CstOffset);
+        } else {
+          Ops.push_back(I->getOperand(i));
+        }
+      }
+      CurDAG->UpdateNodeOperands(*I, Ops.data(), Ops.size());
+    }
+    break;
+  }
   case ISD::BUILD_VECTOR: {
     const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>();
     if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
@@ -224,7 +312,6 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNod
     if (ST.getGeneration() > AMDGPUSubtarget::NORTHERN_ISLANDS) {
       break;
     }
-    const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo());
 
     uint64_t ImmValue = 0;
     unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
@@ -342,7 +429,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNod
     if (Result && Result->isMachineOpcode() &&
         !(TII->get(Result->getMachineOpcode()).TSFlags & R600_InstFlag::VECTOR)
         && TII->hasInstrModifiers(Result->getMachineOpcode())) {
-      // Fold FNEG/FABS/CONST_ADDRESS
+      // Fold FNEG/FABS
       // TODO: Isel can generate multiple MachineInst, we need to recursively
       // parse Result
       bool IsModified = false;
@@ -382,24 +469,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNod
 }
 
 bool AMDGPUDAGToDAGISel::FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg,
-                                     SDValue &Abs, const R600InstrInfo *TII,
-                                     std::vector<unsigned> Consts) {
+                                     SDValue &Abs, const R600InstrInfo *TII) {
   switch (Src.getOpcode()) {
-  case AMDGPUISD::CONST_ADDRESS: {
-    SDValue CstOffset;
-    if (Src.getValueType().isVector() ||
-        !SelectGlobalValueConstantOffset(Src.getOperand(0), CstOffset))
-      return false;
-
-    ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
-    Consts.push_back(Cst->getZExtValue());
-    if (!TII->fitsConstReadLimitations(Consts))
-      return false;
-
-    Src = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
-    Sel = CstOffset;
-    return true;
-    }
   case ISD::FNEG:
     Src = Src.getOperand(0);
     Neg = CurDAG->getTargetConstant(1, MVT::i32);
@@ -441,19 +512,6 @@ bool AMDGPUDAGToDAGISel::FoldOperands(un
     -1
   };
 
-  // Gather constants values
-  std::vector<unsigned> Consts;
-  for (unsigned j = 0; j < 3; j++) {
-    int SrcIdx = OperandIdx[j];
-    if (SrcIdx < 0)
-      break;
-    if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
-      if (Reg->getReg() == AMDGPU::ALU_CONST) {
-        ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
-        Consts.push_back(Cst->getZExtValue());
-      }
-    }
-  }
 
   for (unsigned i = 0; i < 3; i++) {
     if (OperandIdx[i] < 0)
@@ -463,7 +521,7 @@ bool AMDGPUDAGToDAGISel::FoldOperands(un
     SDValue &Neg = Ops[NegIdx[i] - 1];
     SDValue FakeAbs;
     SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
-    if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+    if (FoldOperand(Src, Sel, Neg, Abs, TII))
       return true;
   }
   return false;
@@ -512,20 +570,6 @@ bool AMDGPUDAGToDAGISel::FoldDotOperands
     TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
   };
 
-  // Gather constants values
-  std::vector<unsigned> Consts;
-  for (unsigned j = 0; j < 8; j++) {
-    int SrcIdx = OperandIdx[j];
-    if (SrcIdx < 0)
-      break;
-    if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
-      if (Reg->getReg() == AMDGPU::ALU_CONST) {
-        ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
-        Consts.push_back(Cst->getZExtValue());
-      }
-    }
-  }
-
   for (unsigned i = 0; i < 8; i++) {
     if (OperandIdx[i] < 0)
       return false;
@@ -533,7 +577,7 @@ bool AMDGPUDAGToDAGISel::FoldDotOperands
     SDValue &Sel = Ops[SelIdx[i] - 1];
     SDValue &Neg = Ops[NegIdx[i] - 1];
     SDValue &Abs = Ops[AbsIdx[i] - 1];
-    if (FoldOperand(Src, Sel, Neg, Abs, TII, Consts))
+    if (FoldOperand(Src, Sel, Neg, Abs, TII))
       return true;
   }
   return false;

Modified: llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ISelLowering.cpp?rev=186919&r1=186918&r2=186919&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600ISelLowering.cpp Mon Jul 22 20:48:24 2013
@@ -1154,6 +1154,30 @@ SDValue R600TargetLowering::LowerLOAD(SD
     return DAG.getMergeValues(MergedValues, 2, DL);
   }
 
+  // For most operations returning SDValue() will result int he node being
+  // expanded by the DAG Legalizer.  This is not the case for ISD::LOAD, so
+  // we need to manually expand loads that may be legal in some address spaces
+  // and illegal in others.  SEXT loads from CONSTANT_BUFFER_0 are supported
+  // for compute shaders, since the data is sign extended when it is uploaded
+  // to the buffer.  Howerver SEXT loads from other addresspaces are not
+  // supported, so we need to expand them here.
+  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
+    EVT MemVT = LoadNode->getMemoryVT();
+    assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
+    SDValue ShiftAmount =
+          DAG.getConstant(VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
+    SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
+                                  LoadNode->getPointerInfo(), MemVT,
+                                  LoadNode->isVolatile(),
+                                  LoadNode->isNonTemporal(),
+                                  LoadNode->getAlignment());
+    SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, NewLoad, ShiftAmount);
+    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Shl, ShiftAmount);
+
+    SDValue MergedValues[2] = { Sra, Chain };
+    return DAG.getMergeValues(MergedValues, 2, DL);
+  }
+
   if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
     return SDValue();
   }

Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=186919&r1=186918&r2=186919&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.cpp Mon Jul 22 20:48:24 2013
@@ -186,6 +186,42 @@ bool R600InstrInfo::mustBeLastInClause(u
   }
 }
 
+int R600InstrInfo::getSrcIdx(unsigned Opcode, unsigned SrcNum) const {
+  static const unsigned OpTable[] = {
+    AMDGPU::OpName::src0,
+    AMDGPU::OpName::src1,
+    AMDGPU::OpName::src2
+  };
+
+  assert (SrcNum < 3);
+  return getOperandIdx(Opcode, OpTable[SrcNum]);
+}
+
+#define SRC_SEL_ROWS 11
+int R600InstrInfo::getSelIdx(unsigned Opcode, unsigned SrcIdx) const {
+  static const unsigned SrcSelTable[SRC_SEL_ROWS][2] = {
+    {AMDGPU::OpName::src0, AMDGPU::OpName::src0_sel},
+    {AMDGPU::OpName::src1, AMDGPU::OpName::src1_sel},
+    {AMDGPU::OpName::src2, AMDGPU::OpName::src2_sel},
+    {AMDGPU::OpName::src0_X, AMDGPU::OpName::src0_sel_X},
+    {AMDGPU::OpName::src0_Y, AMDGPU::OpName::src0_sel_Y},
+    {AMDGPU::OpName::src0_Z, AMDGPU::OpName::src0_sel_Z},
+    {AMDGPU::OpName::src0_W, AMDGPU::OpName::src0_sel_W},
+    {AMDGPU::OpName::src1_X, AMDGPU::OpName::src1_sel_X},
+    {AMDGPU::OpName::src1_Y, AMDGPU::OpName::src1_sel_Y},
+    {AMDGPU::OpName::src1_Z, AMDGPU::OpName::src1_sel_Z},
+    {AMDGPU::OpName::src1_W, AMDGPU::OpName::src1_sel_W}
+  };
+
+  for (unsigned i = 0; i < SRC_SEL_ROWS; ++i) {
+    if (getOperandIdx(Opcode, SrcSelTable[i][0]) == (int)SrcIdx) {
+      return getOperandIdx(Opcode, SrcSelTable[i][1]);
+    }
+  }
+  return -1;
+}
+#undef SRC_SEL_ROWS
+
 SmallVector<std::pair<MachineOperand *, int64_t>, 3>
 R600InstrInfo::getSrcs(MachineInstr *MI) const {
   SmallVector<std::pair<MachineOperand *, int64_t>, 3> Result;

Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=186919&r1=186918&r2=186919&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.h (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.h Mon Jul 22 20:48:24 2013
@@ -76,6 +76,13 @@ namespace llvm {
 
   bool mustBeLastInClause(unsigned Opcode) const;
 
+  /// \returns The operand index for the given source number.  Legal values
+  /// for SrcNum are 0, 1, and 2.
+  int getSrcIdx(unsigned Opcode, unsigned SrcNum) const;
+  /// \returns The operand Index for the Sel operand given an index to one
+  /// of the instruction's src operands.
+  int getSelIdx(unsigned Opcode, unsigned SrcIdx) const;
+
   /// \returns a pair for each src of an ALU instructions.
   /// The first member of a pair is the register id.
   /// If register is ALU_CONST, second member is SEL.

Modified: llvm/trunk/test/CodeGen/R600/bfi_int.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/bfi_int.ll?rev=186919&r1=186918&r2=186919&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/bfi_int.ll (original)
+++ llvm/trunk/test/CodeGen/R600/bfi_int.ll Mon Jul 22 20:48:24 2013
@@ -36,7 +36,7 @@ entry:
 ; SHA-256 Ma function
 ; ((x & z) | (y & (x | z)))
 ; R600-CHECK: @bfi_sha256_ma
-; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]],
+; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
 ; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
 ; SI-CHECK: V_XOR_B32_e64 [[DST:VGPR[0-9]+]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}
 ; SI-CHECK: V_BFI_B32 {{VGPR[0-9]+}}, [[DST]], {{[SV]GPR[0-9]+, [SV]GPR[0-9]+}}