[llvm] r177078 - R600: Factorize code handling Const Read Port limitation

Vincent Lejeune vljn at ovi.com
Thu Mar 14 08:50:45 PDT 2013


Author: vljn
Date: Thu Mar 14 10:50:45 2013
New Revision: 177078

URL: http://llvm.org/viewvc/llvm-project?rev=177078&view=rev
Log:
R600: Factorize code handling Const Read Port limitation

Modified:
    llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
    llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
    llvm/trunk/lib/Target/R600/R600InstrInfo.h
    llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp
    llvm/trunk/lib/Target/R600/R600MachineScheduler.h
    llvm/trunk/test/CodeGen/R600/kcache-fold.ll

Modified: llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp?rev=177078&r1=177077&r2=177078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp Thu Mar 14 10:50:45 2013
@@ -365,17 +365,34 @@ bool AMDGPUDAGToDAGISel::FoldOperands(un
     SDValue Operand = Ops[OperandIdx[i] - 1];
     switch (Operand.getOpcode()) {
     case AMDGPUISD::CONST_ADDRESS: {
-      if (i == 2)
-        break;
       SDValue CstOffset;
-      if (!Operand.getValueType().isVector() &&
-          SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset)) {
-        Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
-        Ops[SelIdx[i] - 1] = CstOffset;
-        return true;
+      if (Operand.getValueType().isVector() ||
+          !SelectGlobalValueConstantOffset(Operand.getOperand(0), CstOffset))
+        break;
+
+      // Gather others constants values
+      std::vector<unsigned> Consts;
+      for (unsigned j = 0; j < 3; j++) {
+        int SrcIdx = OperandIdx[j];
+        if (SrcIdx < 0)
+          break;
+        if (RegisterSDNode *Reg = dyn_cast<RegisterSDNode>(Ops[SrcIdx - 1])) {
+          if (Reg->getReg() == AMDGPU::ALU_CONST) {
+            ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Ops[SelIdx[j] - 1]);
+            Consts.push_back(Cst->getZExtValue());
+          }
+        }
       }
+
+      ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(CstOffset);
+      Consts.push_back(Cst->getZExtValue());
+      if (!TII->fitsConstReadLimitations(Consts))
+        break;
+
+      Ops[OperandIdx[i] - 1] = CurDAG->getRegister(AMDGPU::ALU_CONST, MVT::f32);
+      Ops[SelIdx[i] - 1] = CstOffset;
+      return true;
       }
-      break;
     case ISD::FNEG:
       if (NegIdx[i] < 0)
         break;

Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.cpp?rev=177078&r1=177077&r2=177078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.cpp Thu Mar 14 10:50:45 2013
@@ -139,6 +139,60 @@ bool R600InstrInfo::isALUInstr(unsigned
           (TargetFlags & R600_InstFlag::OP3));
 }
 
+bool
+R600InstrInfo::fitsConstReadLimitations(const std::vector<unsigned> &Consts)
+    const {
+  assert (Consts.size() <= 12 && "Too many operands in instructions group");
+  unsigned Pair1 = 0, Pair2 = 0;
+  for (unsigned i = 0, n = Consts.size(); i < n; ++i) {
+    unsigned ReadConstHalf = Consts[i] & 2;
+    unsigned ReadConstIndex = Consts[i] & (~3);
+    unsigned ReadHalfConst = ReadConstIndex | ReadConstHalf;
+    if (!Pair1) {
+      Pair1 = ReadHalfConst;
+      continue;
+    }
+    if (Pair1 == ReadHalfConst)
+      continue;
+    if (!Pair2) {
+      Pair2 = ReadHalfConst;
+      continue;
+    }
+    if (Pair2 != ReadHalfConst)
+      return false;
+  }
+  return true;
+}
+
+bool
+R600InstrInfo::canBundle(const std::vector<MachineInstr *> &MIs) const {
+  std::vector<unsigned> Consts;
+  for (unsigned i = 0, n = MIs.size(); i < n; i++) {
+    const MachineInstr *MI = MIs[i];
+
+    const R600Operands::Ops OpTable[3][2] = {
+      {R600Operands::SRC0, R600Operands::SRC0_SEL},
+      {R600Operands::SRC1, R600Operands::SRC1_SEL},
+      {R600Operands::SRC2, R600Operands::SRC2_SEL},
+    };
+
+    if (!isALUInstr(MI->getOpcode()))
+      continue;
+
+    for (unsigned j = 0; j < 3; j++) {
+      int SrcIdx = getOperandIdx(MI->getOpcode(), OpTable[j][0]);
+      if (SrcIdx < 0)
+        break;
+      if (MI->getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST) {
+        unsigned Const = MI->getOperand(
+            getOperandIdx(MI->getOpcode(), OpTable[j][1])).getImm();
+        Consts.push_back(Const);
+      }
+    }
+  }
+  return fitsConstReadLimitations(Consts);
+}
+
 DFAPacketizer *R600InstrInfo::CreateTargetScheduleState(const TargetMachine *TM,
     const ScheduleDAG *DAG) const {
   const InstrItineraryData *II = TM->getInstrItineraryData();

Modified: llvm/trunk/lib/Target/R600/R600InstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600InstrInfo.h?rev=177078&r1=177077&r2=177078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600InstrInfo.h (original)
+++ llvm/trunk/lib/Target/R600/R600InstrInfo.h Thu Mar 14 10:50:45 2013
@@ -53,6 +53,9 @@ namespace llvm {
   /// \returns true if this \p Opcode represents an ALU instruction.
   bool isALUInstr(unsigned Opcode) const;
 
+  bool fitsConstReadLimitations(const std::vector<unsigned>&) const;
+  bool canBundle(const std::vector<MachineInstr *> &) const;
+
   /// \breif Vector instructions are instructions that must fill all
   /// instruction slots within an instruction group.
   bool isVector(const MachineInstr &MI) const;

Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp?rev=177078&r1=177077&r2=177078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600MachineScheduler.cpp Thu Mar 14 10:50:45 2013
@@ -37,7 +37,6 @@ void R600SchedStrategy::initialize(Sched
   CurInstKind = IDOther;
   CurEmitted = 0;
   OccupedSlotsMask = 15;
-  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
   InstKindLimit[IDAlu] = 120; // 120 minus 8 for security
 
 
@@ -288,79 +287,19 @@ int R600SchedStrategy::getInstKind(SUnit
   }
 }
 
-class ConstPairs {
-private:
-  unsigned XYPair;
-  unsigned ZWPair;
-public:
-  ConstPairs(unsigned ReadConst[3]) : XYPair(0), ZWPair(0) {
-    for (unsigned i = 0; i < 3; i++) {
-      unsigned ReadConstChan = ReadConst[i] & 3;
-      unsigned ReadConstIndex = ReadConst[i] & (~3);
-      if (ReadConstChan < 2) {
-        if (!XYPair) {
-          XYPair = ReadConstIndex;
-        }
-      } else {
-        if (!ZWPair) {
-          ZWPair = ReadConstIndex;
-        }
-      }
-    }
-  }
-
-  bool isCompatibleWith(const ConstPairs& CP) const {
-    return (!XYPair || !CP.XYPair || CP.XYPair == XYPair) &&
-        (!ZWPair || !CP.ZWPair || CP.ZWPair == ZWPair);
-  }
-};
-
-static
-const ConstPairs getPairs(const R600InstrInfo *TII, const MachineInstr& MI) {
-  unsigned ReadConsts[3] = {0, 0, 0};
-  R600Operands::Ops OpTable[3][2] = {
-    {R600Operands::SRC0, R600Operands::SRC0_SEL},
-    {R600Operands::SRC1, R600Operands::SRC1_SEL},
-    {R600Operands::SRC2, R600Operands::SRC2_SEL},
-  };
-
-  if (!TII->isALUInstr(MI.getOpcode()))
-    return ConstPairs(ReadConsts);
-
-  for (unsigned i = 0; i < 3; i++) {
-    int SrcIdx = TII->getOperandIdx(MI.getOpcode(), OpTable[i][0]);
-    if (SrcIdx < 0)
-      break;
-    if (MI.getOperand(SrcIdx).getReg() == AMDGPU::ALU_CONST)
-      ReadConsts[i] =MI.getOperand(
-          TII->getOperandIdx(MI.getOpcode(), OpTable[i][1])).getImm();
-  }
-  return ConstPairs(ReadConsts);
-}
-
-bool
-R600SchedStrategy::isBundleable(const MachineInstr& MI) {
-  const ConstPairs &MIPair = getPairs(TII, MI);
-  for (unsigned i = 0; i < 4; i++) {
-    if (!InstructionsGroupCandidate[i])
-      continue;
-    const ConstPairs &IGPair = getPairs(TII,
-        *InstructionsGroupCandidate[i]->getInstr());
-    if (!IGPair.isCompatibleWith(MIPair))
-      return false;
-  }
-  return true;
-}
-
 SUnit *R600SchedStrategy::PopInst(std::multiset<SUnit *, CompareSUnit> &Q) {
   if (Q.empty())
     return NULL;
   for (std::set<SUnit *, CompareSUnit>::iterator It = Q.begin(), E = Q.end();
       It != E; ++It) {
     SUnit *SU = *It;
-    if (isBundleable(*SU->getInstr())) {
+    InstructionsGroupCandidate.push_back(SU->getInstr());
+    if (TII->canBundle(InstructionsGroupCandidate)) {
+      InstructionsGroupCandidate.pop_back();
       Q.erase(It);
       return SU;
+    } else {
+      InstructionsGroupCandidate.pop_back();
     }
   }
   return NULL;
@@ -381,7 +320,7 @@ void R600SchedStrategy::PrepareNextSlot(
   DEBUG(dbgs() << "New Slot\n");
   assert (OccupedSlotsMask && "Slot wasn't filled");
   OccupedSlotsMask = 0;
-  memset(InstructionsGroupCandidate, 0, sizeof(InstructionsGroupCandidate));
+  InstructionsGroupCandidate.clear();
   LoadAlu();
 }
 
@@ -462,7 +401,7 @@ SUnit* R600SchedStrategy::pickAlu() {
         SUnit *SU = AttemptFillSlot(Chan);
         if (SU) {
           OccupedSlotsMask |= (1 << Chan);
-          InstructionsGroupCandidate[Chan] = SU;
+          InstructionsGroupCandidate.push_back(SU->getInstr());
           return SU;
         }
       }

Modified: llvm/trunk/lib/Target/R600/R600MachineScheduler.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600MachineScheduler.h?rev=177078&r1=177077&r2=177078&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600MachineScheduler.h (original)
+++ llvm/trunk/lib/Target/R600/R600MachineScheduler.h Thu Mar 14 10:50:45 2013
@@ -98,7 +98,7 @@ public:
   virtual void releaseBottomNode(SUnit *SU);
 
 private:
-  SUnit *InstructionsGroupCandidate[4];
+  std::vector<MachineInstr *> InstructionsGroupCandidate;
 
   int getInstKind(SUnit *SU);
   bool regBelongsToClass(unsigned Reg, const TargetRegisterClass *RC) const;
@@ -112,7 +112,6 @@ private:
   void AssignSlot(MachineInstr *MI, unsigned Slot);
   SUnit* pickAlu();
   SUnit* pickOther(int QID);
-  bool isBundleable(const MachineInstr& MI);
   void MoveUnits(ReadyQueue *QSrc, ReadyQueue *QDst);
 };
 

Modified: llvm/trunk/test/CodeGen/R600/kcache-fold.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/kcache-fold.ll?rev=177078&r1=177077&r2=177078&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/kcache-fold.ll (original)
+++ llvm/trunk/test/CodeGen/R600/kcache-fold.ll Thu Mar 14 10:50:45 2013
@@ -1,8 +1,8 @@
 ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
 
+; CHECK: @main1
 ; CHECK: MOV T{{[0-9]+\.[XYZW], CBuf0\[[0-9]+\]\.[XYZW]}}
-
-define void @main() {
+define void @main1() {
 main_body:
   %0 = load <4 x float> addrspace(8)* null
   %1 = extractelement <4 x float> %0, i32 0
@@ -37,6 +37,54 @@ main_body:
   %30 = fcmp ult float %25, 0.000000e+00
   %31 = select i1 %30, float %27, float %29
   %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
+  %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
+  %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
+  %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)
+  %36 = insertelement <4 x float> undef, float %32, i32 0
+  %37 = insertelement <4 x float> %36, float %33, i32 1
+  %38 = insertelement <4 x float> %37, float %34, i32 2
+  %39 = insertelement <4 x float> %38, float %35, i32 3
+  call void @llvm.R600.store.swizzle(<4 x float> %39, i32 0, i32 0)
+  ret void
+}
+
+; CHECK: @main2
+; CHECK-NOT: MOV
+define void @main2() {
+main_body:
+  %0 = load <4 x float> addrspace(8)* null
+  %1 = extractelement <4 x float> %0, i32 0
+  %2 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %3 = extractelement <4 x float> %2, i32 0
+  %4 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %5 = extractelement <4 x float> %4, i32 1
+  %6 = fcmp ult float %1, 0.000000e+00
+  %7 = select i1 %6, float %3, float %5
+  %8 = load <4 x float> addrspace(8)* null
+  %9 = extractelement <4 x float> %8, i32 1
+  %10 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %11 = extractelement <4 x float> %10, i32 0
+  %12 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %13 = extractelement <4 x float> %12, i32 1
+  %14 = fcmp ult float %9, 0.000000e+00
+  %15 = select i1 %14, float %11, float %13
+  %16 = load <4 x float> addrspace(8)* null
+  %17 = extractelement <4 x float> %16, i32 2
+  %18 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %19 = extractelement <4 x float> %18, i32 3
+  %20 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 1)
+  %21 = extractelement <4 x float> %20, i32 2
+  %22 = fcmp ult float %17, 0.000000e+00
+  %23 = select i1 %22, float %19, float %21
+  %24 = load <4 x float> addrspace(8)* null
+  %25 = extractelement <4 x float> %24, i32 3
+  %26 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %27 = extractelement <4 x float> %26, i32 3
+  %28 = load <4 x float> addrspace(8)* getelementptr ([1024 x <4 x float>] addrspace(8)* null, i64 0, i32 2)
+  %29 = extractelement <4 x float> %28, i32 2
+  %30 = fcmp ult float %25, 0.000000e+00
+  %31 = select i1 %30, float %27, float %29
+  %32 = call float @llvm.AMDIL.clamp.(float %7, float 0.000000e+00, float 1.000000e+00)
   %33 = call float @llvm.AMDIL.clamp.(float %15, float 0.000000e+00, float 1.000000e+00)
   %34 = call float @llvm.AMDIL.clamp.(float %23, float 0.000000e+00, float 1.000000e+00)
   %35 = call float @llvm.AMDIL.clamp.(float %31, float 0.000000e+00, float 1.000000e+00)





More information about the llvm-commits mailing list