[llvm-branch-commits] [llvm-branch] r164538 - in /llvm/branches/R600/lib/Target/AMDGPU: AMDGPUISelLowering.cpp AMDGPUISelLowering.h R600ExpandSpecialInstrs.cpp R600ISelLowering.cpp R600ISelLowering.h R600Instructions.td R600Intrinsics.td R600MachineFunctionInfo.cpp R600MachineFunctionInfo.h

Tom Stellard thomas.stellard at amd.com
Mon Sep 24 08:54:34 PDT 2012


Author: tstellar
Date: Mon Sep 24 10:52:54 2012
New Revision: 164538

URL: http://llvm.org/viewvc/llvm-project?rev=164538&view=rev
Log:
R600: support for interpolation intrinsics

Patch by Vincent Lejeune.

Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

Modified:
    llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.h
    llvm/branches/R600/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
    llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp
    llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.h
    llvm/branches/R600/lib/Target/AMDGPU/R600Instructions.td
    llvm/branches/R600/lib/Target/AMDGPU/R600Intrinsics.td
    llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
    llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.h

Modified: llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Mon Sep 24 10:52:54 2012
@@ -346,5 +346,7 @@
   NODE_NAME_CASE(SMIN)
   NODE_NAME_CASE(UMIN)
   NODE_NAME_CASE(URECIP)
+  NODE_NAME_CASE(INTERP)
+  NODE_NAME_CASE(INTERP_P0)
   }
 }

Modified: llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/AMDGPUISelLowering.h Mon Sep 24 10:52:54 2012
@@ -119,6 +119,8 @@
   SMIN,
   UMIN,
   URECIP,
+  INTERP,
+  INTERP_P0,
   LAST_AMDGPU_ISD_NUMBER
 };
 

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp Mon Sep 24 10:52:54 2012
@@ -15,6 +15,7 @@
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
 #include "R600RegisterInfo.h"
+#include "R600MachineFunctionInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -29,6 +30,9 @@
   static char ID;
   const R600InstrInfo *TII;
 
+  bool ExpandInputPerspective(MachineInstr& MI);
+  bool ExpandInputConstant(MachineInstr& MI);
+
 public:
   R600ExpandSpecialInstrsPass(TargetMachine &tm) : MachineFunctionPass(ID),
     TII (static_cast<const R600InstrInfo *>(tm.getInstrInfo())) { }
@@ -48,6 +52,126 @@
   return new R600ExpandSpecialInstrsPass(TM);
 }
 
+bool R600ExpandSpecialInstrsPass::ExpandInputPerspective(MachineInstr &MI)
+{
+  const R600RegisterInfo &TRI = TII->getRegisterInfo();
+  if (MI.getOpcode() != AMDGPU::input_perspective)
+    return false;
+
+  MachineBasicBlock::iterator I = &MI;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  R600MachineFunctionInfo *MFI = MI.getParent()->getParent()
+      ->getInfo<R600MachineFunctionInfo>();
+  unsigned IJIndexBase;
+
+  // In Evergreen ISA doc section 8.3.2 :
+  // We need to interpolate XY and ZW in two different instruction groups.
+  // An INTERP_* must occupy all 4 slots of an instruction group.
+  // Output of INTERP_XY is written in X,Y slots
+  // Output of INTERP_ZW is written in Z,W slots
+  //
+  // Thus interpolation requires the following sequences :
+  //
+  // AnyGPR.x = INTERP_ZW; (Write Masked Out)
+  // AnyGPR.y = INTERP_ZW; (Write Masked Out)
+  // DstGPR.z = INTERP_ZW;
+  // DstGPR.w = INTERP_ZW; (End of first IG)
+  // DstGPR.x = INTERP_XY;
+  // DstGPR.y = INTERP_XY;
+  // AnyGPR.z = INTERP_XY; (Write Masked Out)
+  // AnyGPR.w = INTERP_XY; (Write Masked Out) (End of second IG)
+  //
+  switch (MI.getOperand(1).getImm()) {
+  case 0:
+    IJIndexBase = MFI->GetIJPerspectiveIndex();
+    break;
+  case 1:
+    IJIndexBase = MFI->GetIJLinearIndex();
+    break;
+  default:
+    assert(0 && "Unknow ij index");
+  }
+
+  for (unsigned i = 0; i < 8; i++) {
+    unsigned IJIndex = AMDGPU::R600_TReg32RegClass.getRegister(
+        2 * IJIndexBase + ((i + 1) % 2));
+    unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
+        4 * MI.getOperand(2).getImm());
+
+    unsigned Sel;
+    switch (i % 4) {
+    case 0:Sel = AMDGPU::sel_x;break;
+    case 1:Sel = AMDGPU::sel_y;break;
+    case 2:Sel = AMDGPU::sel_z;break;
+    case 3:Sel = AMDGPU::sel_w;break;
+    default:break;
+    }
+
+    unsigned Res = TRI.getSubReg(DstReg, Sel);
+
+    const MCInstrDesc &Opcode = (i < 4)?
+        TII->get(AMDGPU::INTERP_ZW):
+        TII->get(AMDGPU::INTERP_XY);
+
+    MachineInstr *NewMI = BuildMI(*(MI.getParent()),
+        I, MI.getParent()->findDebugLoc(I),
+        Opcode, Res)
+        .addReg(IJIndex)
+        .addReg(ReadReg)
+        .addImm(0);
+
+    if (!(i> 1 && i < 6)) {
+      TII->addFlag(NewMI, 0, MO_FLAG_MASK);
+    }
+
+    if (i % 4 !=  3)
+      TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+  }
+
+  MI.eraseFromParent();
+
+  return true;
+}
+
+bool R600ExpandSpecialInstrsPass::ExpandInputConstant(MachineInstr &MI)
+{
+  const R600RegisterInfo &TRI = TII->getRegisterInfo();
+  if (MI.getOpcode() != AMDGPU::input_constant)
+    return false;
+
+  MachineBasicBlock::iterator I = &MI;
+  unsigned DstReg = MI.getOperand(0).getReg();
+
+  for (unsigned i = 0; i < 4; i++) {
+    unsigned ReadReg = AMDGPU::R600_TReg32RegClass.getRegister(
+        4 * MI.getOperand(1).getImm() + i);
+
+    unsigned Sel;
+    switch (i % 4) {
+    case 0:Sel = AMDGPU::sel_x;break;
+    case 1:Sel = AMDGPU::sel_y;break;
+    case 2:Sel = AMDGPU::sel_z;break;
+    case 3:Sel = AMDGPU::sel_w;break;
+    default:break;
+    }
+
+    unsigned Res = TRI.getSubReg(DstReg, Sel);
+
+    MachineInstr *NewMI = BuildMI(*(MI.getParent()),
+        I, MI.getParent()->findDebugLoc(I),
+        TII->get(AMDGPU::INTERP_LOAD_P0), Res)
+        .addReg(ReadReg)
+        .addImm(0);
+
+    if (i % 4 !=  3)
+      TII->addFlag(NewMI, 0, MO_FLAG_NOT_LAST);
+  }
+
+  MI.eraseFromParent();
+
+  return true;
+}
+
 bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
 
   const R600RegisterInfo &TRI = TII->getRegisterInfo();
@@ -60,6 +184,11 @@
       MachineInstr &MI = *I;
       I = llvm::next(I);
 
+	if (ExpandInputPerspective(MI))
+	  continue;
+	if (ExpandInputConstant(MI))
+	  continue;
+
       bool IsReduction = TII->isReductionOp(MI.getOpcode());
       bool IsVector = TII->isVector(MI);
 	    bool IsCube = TII->isCubeOp(MI.getOpcode());

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.cpp Mon Sep 24 10:52:54 2012
@@ -44,6 +44,7 @@
 
   setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
 
   setOperationAction(ISD::ROTL, MVT::i32, Custom);
 
@@ -240,6 +241,29 @@
               .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
       break;
     }
+  case AMDGPU::input_perspective:
+    {
+      R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
+
+      // XXX Be more fine about register reservation
+      for (unsigned i = 0; i < 4; i ++) {
+        unsigned ReservedReg = AMDGPU::R600_TReg32RegClass.getRegister(i);
+        MFI->ReservedRegs.push_back(ReservedReg);
+      }
+
+      switch (MI->getOperand(1).getImm()) {
+      case 0:// Perspective
+        MFI->HasPerspectiveInterpolation = true;
+        break;
+      case 1:// Linear
+        MFI->HasLinearInterpolation = true;
+        break;
+      default:
+        assert(0 && "Unknow ij index");
+      }
+
+      return BB;
+    }
   }
 
   MI->eraseFromParent();
@@ -294,6 +318,47 @@
       unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, Reg, VT);
     }
+    case AMDGPUIntrinsic::R600_load_input_perspective: {
+      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      SDValue FullVector = DAG.getNode(
+          AMDGPUISD::INTERP,
+          DL, MVT::v4f32,
+          DAG.getConstant(0, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+        DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
+    }
+    case AMDGPUIntrinsic::R600_load_input_linear: {
+      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      SDValue FullVector = DAG.getNode(
+        AMDGPUISD::INTERP,
+        DL, MVT::v4f32,
+        DAG.getConstant(1, MVT::i32), DAG.getConstant(slot / 4 , MVT::i32));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+        DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
+    }
+    case AMDGPUIntrinsic::R600_load_input_constant: {
+      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      SDValue FullVector = DAG.getNode(
+        AMDGPUISD::INTERP_P0,
+        DL, MVT::v4f32,
+        DAG.getConstant(slot / 4 , MVT::i32));
+      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
+          DL, VT, FullVector, DAG.getConstant(slot % 4, MVT::i32));
+    }
+    case AMDGPUIntrinsic::R600_load_input_position: {
+      unsigned slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+      unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
+      SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+	    RegIndex, MVT::f32);
+      if ((slot % 4) == 3) {
+        return DAG.getNode(ISD::FDIV,
+            DL, VT,
+            DAG.getConstantFP(1.0f, MVT::f32),
+            Reg);
+      } else {
+        return Reg;
+      }
+    }
 
     case r600_read_ngroups_x:
       return LowerImplicitParameter(DAG, VT, DL, 0);
@@ -347,9 +412,30 @@
   switch (N->getOpcode()) {
   default: return;
   case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
+  case ISD::INTRINSIC_WO_CHAIN:
+    {
+      unsigned IntrinsicID =
+          cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
+      if (IntrinsicID == AMDGPUIntrinsic::R600_load_input_face) {
+        Results.push_back(LowerInputFace(N, DAG));
+      } else {
+        return;
+      }
+    }
   }
 }
 
+SDValue R600TargetLowering::LowerInputFace(SDNode* Op, SelectionDAG &DAG) const
+{
+  unsigned slot = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
+  unsigned RegIndex = AMDGPU::R600_TReg32RegClass.getRegister(slot);
+  SDValue Reg = CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
+      RegIndex, MVT::f32);
+  return DAG.getNode(ISD::SETCC, Op->getDebugLoc(), MVT::i1,
+      Reg, DAG.getConstantFP(0.0f, MVT::f32),
+      DAG.getCondCode(ISD::SETUGT));
+}
+
 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const
 {
   return DAG.getNode(

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.h?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.h (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600ISelLowering.h Mon Sep 24 10:52:54 2012
@@ -58,6 +58,7 @@
 
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerInputFace(SDNode *Op, SelectionDAG &DAG) const;
   SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
 };
 

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600Instructions.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600Instructions.td?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/R600Instructions.td (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600Instructions.td Mon Sep 24 10:52:54 2012
@@ -264,6 +264,60 @@
 def isR600toCayman : Predicate<
                      "Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX">;
 
+//===----------------------------------------------------------------------===//
+// Interpolation Instructions
+//===----------------------------------------------------------------------===//
+
+def INTERP: SDNode<"AMDGPUISD::INTERP",
+  SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisInt<1>, SDTCisInt<2>]>
+  >;
+
+def INTERP_P0: SDNode<"AMDGPUISD::INTERP_P0",
+  SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisInt<1>]>
+  >;
+
+let usesCustomInserter = 1 in {
+def input_perspective :  AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins i32imm:$src0, i32imm:$src1),
+  "input_perspective $src0 $src1 : dst",
+  [(set R600_Reg128:$dst, (INTERP (i32 imm:$src0), (i32 imm:$src1)))]>;
+}  // End usesCustomInserter = 1
+
+def input_constant :  AMDGPUShaderInst <
+  (outs R600_Reg128:$dst),
+  (ins i32imm:$src),
+  "input_perspective $src : dst",
+  [(set R600_Reg128:$dst, (INTERP_P0 (i32 imm:$src)))]>;
+
+
+
+def INTERP_XY : InstR600 <0xD6,
+  (outs R600_Reg32:$dst),
+  (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
+  "INTERP_XY dst",
+  [], AnyALU>
+{
+  let FlagOperandIdx = 3;
+}
+
+def INTERP_ZW : InstR600 <0xD7,
+  (outs R600_Reg32:$dst),
+  (ins R600_Reg32:$src0, R600_Reg32:$src1, i32imm:$flags),
+  "INTERP_ZW dst",
+  [], AnyALU>
+{
+  let FlagOperandIdx = 3;
+}
+
+def INTERP_LOAD_P0 : InstR600 <0xE0,
+  (outs R600_Reg32:$dst),
+  (ins R600_Reg32:$src, i32imm:$flags),
+  "INTERP_LOAD_P0 dst",
+  [], AnyALU>
+{
+  let FlagOperandIdx = 2;
+}
 
 let Predicates = [isR600toCayman] in { 
 

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600Intrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600Intrinsics.td?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/R600Intrinsics.td (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600Intrinsics.td Mon Sep 24 10:52:54 2012
@@ -13,4 +13,14 @@
 
 let TargetPrefix = "R600", isTarget = 1 in {
   def int_R600_load_input : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
+  def int_R600_load_input_perspective :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+  def int_R600_load_input_constant :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+  def int_R600_load_input_linear :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+  def int_R600_load_input_position :
+    Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrReadMem]>;
+  def int_R600_load_input_face :
+    Intrinsic<[llvm_i1_ty], [llvm_i32_ty], [IntrReadMem]>;
 }

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp Mon Sep 24 10:52:54 2012
@@ -12,5 +12,22 @@
 using namespace llvm;
 
 R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
-  : MachineFunctionInfo()
+  : MachineFunctionInfo(),
+    HasLinearInterpolation(false),
+    HasPerspectiveInterpolation(false)
   { }
+
+unsigned R600MachineFunctionInfo::GetIJPerspectiveIndex() const
+{
+  assert(HasPerspectiveInterpolation);
+  return 0;
+}
+
+unsigned R600MachineFunctionInfo::GetIJLinearIndex() const
+{
+  assert(HasLinearInterpolation);
+  if (HasPerspectiveInterpolation)
+    return 1;
+  else
+    return 0;
+}

Modified: llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.h?rev=164538&r1=164537&r2=164538&view=diff
==============================================================================
--- llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.h (original)
+++ llvm/branches/R600/lib/Target/AMDGPU/R600MachineFunctionInfo.h Mon Sep 24 10:52:54 2012
@@ -25,6 +25,11 @@
 public:
   R600MachineFunctionInfo(const MachineFunction &MF);
   std::vector<unsigned> ReservedRegs;
+  bool HasLinearInterpolation;
+  bool HasPerspectiveInterpolation;
+
+  unsigned GetIJLinearIndex() const;
+  unsigned GetIJPerspectiveIndex() const;
 
 };
 





More information about the llvm-branch-commits mailing list