Input handling rework for SI

Tom Stellard tom at stellard.net
Tue Mar 5 11:01:01 PST 2013


On Tue, Mar 05, 2013 at 07:53:52PM +0100, Tom Stellard wrote:
> On Tue, Mar 05, 2013 at 03:23:33PM +0100, Christian König wrote:
> > Hi guys,
> > 
> > while working on indirect addressing for radeonsi (and YES I have
> > indirect addressing of consts working and now on the way of getting
> > source-indirect addressing of temps/inputs working) I stumbled over
> > a whole bunch of problems with the way we currently handle vertex
> > and pixel shader input registers. Especially the handling of M0 was
> > sucking badly, and was easily broken by my new code.
> > 
> > So please take a look at the attached patches. I have to confess
> > that it gotten much further than I expected, but I think the way
> > those patches now handle function parameters is much much more
> > cleaner than we did before.
> > 
> > The matching mesa patches should hit the maillinglist in few minutes.
> >
> 
> Hi Christian,
> 
> This series has my r-b, I would like to test on R600 before you push it.
> 

Don't forget to update CMakeLists.txt after removing the SIAssignInterpRegs.cpp
file.

-Tom

>  
> > Cheers,
> > Christian.
> 
> > From 8f252afc3c18ce86bd7b0feebfc514957cffcc8d Mon Sep 17 00:00:00 2001
> > From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> > Date: Sat, 2 Mar 2013 18:48:52 +0100
> > Subject: [PATCH 1/7] R600/SI: fix unused variable warning
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> > ---
> >  lib/Target/R600/AMDGPUIndirectAddressing.cpp |    1 -
> >  1 file changed, 1 deletion(-)
> > 
> > diff --git a/lib/Target/R600/AMDGPUIndirectAddressing.cpp b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> > index 15840b3..ed6c8ec 100644
> > --- a/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> > +++ b/lib/Target/R600/AMDGPUIndirectAddressing.cpp
> > @@ -289,7 +289,6 @@ bool AMDGPUIndirectAddressingPass::runOnMachineFunction(MachineFunction &MF) {
> >  
> >            // We only need to use REG_SEQUENCE for explicit defs, since the
> >            // register coalescer won't do anything with the implicit defs.
> > -          MachineInstr *DefInstr = MRI.getVRegDef(Reg);
> >            if (!regHasExplicitDef(MRI, Reg)) {
> >              continue;
> >            }
> > -- 
> > 1.7.10.4
> > 
> 
> > From 27c534082d61ce2f330034f5a3b5787787e63988 Mon Sep 17 00:00:00 2001
> > From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> > Date: Fri, 1 Mar 2013 10:50:55 +0100
> > Subject: [PATCH 2/7] R600/SI: switch types of SGPRs to v*i8
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> > ---
> >  lib/Target/R600/SIISelLowering.cpp |   21 +++++++++++++++++----
> >  lib/Target/R600/SIInstructions.td  |    4 ++--
> >  lib/Target/R600/SIIntrinsics.td    |    4 ++--
> >  lib/Target/R600/SIRegisterInfo.td  |    6 +++---
> >  4 files changed, 24 insertions(+), 11 deletions(-)
> > 
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index 0a0fbd9..7aa1037 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -28,17 +28,30 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> >      AMDGPUTargetLowering(TM),
> >      TII(static_cast<const SIInstrInfo*>(TM.getInstrInfo())),
> >      TRI(TM.getRegisterInfo()) {
> > -  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
> > -  addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
> > -  addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
> > -  addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
> > +
> >    addRegisterClass(MVT::i1, &AMDGPU::SReg_64RegClass);
> > +  addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
> > +
> > +  addRegisterClass(MVT::v16i8, &AMDGPU::SReg_128RegClass);
> > +  addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
> > +  addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
> > +
> > +  addRegisterClass(MVT::i32, &AMDGPU::VReg_32RegClass);
> > +  addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
> >  
> >    addRegisterClass(MVT::v1i32, &AMDGPU::VReg_32RegClass);
> > +
> >    addRegisterClass(MVT::v2i32, &AMDGPU::VReg_64RegClass);
> > +  addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
> > +
> >    addRegisterClass(MVT::v4i32, &AMDGPU::VReg_128RegClass);
> > +  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
> > +
> >    addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
> > +  addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
> > +
> >    addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
> > +  addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
> >  
> >    computeRegisterProperties();
> >  
> > diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> > index d9d7b4c..bc6d604 100644
> > --- a/lib/Target/R600/SIInstructions.td
> > +++ b/lib/Target/R600/SIInstructions.td
> > @@ -1479,7 +1479,7 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> {
> >  
> >  defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>;
> >  defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>;
> > -defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>;
> > -defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v8i32>;
> > +defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v16i8>;
> > +defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>;
> >  
> >  } // End isSI predicate
> > diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
> > index 611b9c4..04308d8 100644
> > --- a/lib/Target/R600/SIIntrinsics.td
> > +++ b/lib/Target/R600/SIIntrinsics.td
> > @@ -19,10 +19,10 @@ let TargetPrefix = "SI", isTarget = 1 in {
> >    /* XXX: We may need a seperate intrinsic here for loading integer values */
> >    def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
> >    def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
> > -  def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v4i32_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
> > +  def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
> >    def int_SI_wqm : Intrinsic <[], [], []>;
> >  
> > -  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty], [IntrReadMem]>;
> > +  class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_i32_ty, llvm_anyvector_ty, llvm_v32i8_ty, llvm_v16i8_ty, llvm_i32_ty], [IntrReadMem]>;
> >  
> >    def int_SI_sample : Sample;
> >    def int_SI_sampleb : Sample;
> > diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> > index 9e04e24..c4bca2c 100644
> > --- a/lib/Target/R600/SIRegisterInfo.td
> > +++ b/lib/Target/R600/SIRegisterInfo.td
> > @@ -177,11 +177,11 @@ def SReg_64 : RegisterClass<"AMDGPU", [i64, i1], 64,
> >    (add SGPR_64, VCCReg, EXECReg)
> >  >;
> >  
> > -def SReg_128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, (add SGPR_128)>;
> > +def SReg_128 : RegisterClass<"AMDGPU", [v16i8], 128, (add SGPR_128)>;
> >  
> > -def SReg_256 : RegisterClass<"AMDGPU", [v8i32], 256, (add SGPR_256)>;
> > +def SReg_256 : RegisterClass<"AMDGPU", [v32i8], 256, (add SGPR_256)>;
> >  
> > -def SReg_512 : RegisterClass<"AMDGPU", [v16i32], 512, (add SGPR_512)>;
> > +def SReg_512 : RegisterClass<"AMDGPU", [v64i8], 512, (add SGPR_512)>;
> >  
> >  // Register class for all vector registers (VGPRs + Interploation Registers)
> >  def VReg_32 : RegisterClass<"AMDGPU", [f32, i32, v1i32], 32, (add VGPR_32)>;
> > -- 
> > 1.7.10.4
> > 
> 
> > From b0aa2b61d7b86be0cdffd25b6361a9d4fc5da234 Mon Sep 17 00:00:00 2001
> > From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> > Date: Sat, 2 Mar 2013 16:33:30 +0100
> > Subject: [PATCH 3/7] R600/SI: remove shader type intrinsic
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Just encode the type as target specific attribute.
> > 
> > Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> > ---
> >  lib/Target/R600/AMDGPUInstructions.td     |    7 -------
> >  lib/Target/R600/AMDGPUIntrinsics.td       |    2 --
> >  lib/Target/R600/R600ISelLowering.cpp      |    1 -
> >  lib/Target/R600/SIISelLowering.cpp        |    6 ------
> >  lib/Target/R600/SIMachineFunctionInfo.cpp |   18 ++++++++++++++++--
> >  lib/Target/R600/SIMachineFunctionInfo.h   |    2 ++
> >  6 files changed, 18 insertions(+), 18 deletions(-)
> > 
> > diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td
> > index 960f108..a59c775 100644
> > --- a/lib/Target/R600/AMDGPUInstructions.td
> > +++ b/lib/Target/R600/AMDGPUInstructions.td
> > @@ -132,13 +132,6 @@ class FNEG <RegisterClass rc> : AMDGPUShaderInst <
> >    [(set rc:$dst, (fneg rc:$src0))]
> >  >;
> >  
> > -def SHADER_TYPE : AMDGPUShaderInst <
> > -  (outs),
> > -  (ins i32imm:$type),
> > -  "SHADER_TYPE $type",
> > -  [(int_AMDGPU_shader_type imm:$type)]
> > ->;
> > -
> >  } // usesCustomInserter = 1
> >  
> >  multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass,
> > diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td
> > index 2ba2d4b..eecb25b 100644
> > --- a/lib/Target/R600/AMDGPUIntrinsics.td
> > +++ b/lib/Target/R600/AMDGPUIntrinsics.td
> > @@ -50,8 +50,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
> >    def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> >    def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> >    def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
> > -
> > -  def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
> >  }
> >  
> >  let TargetPrefix = "TGSI", isTarget = 1 in {
> > diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> > index b5c2a93..b0f2727 100644
> > --- a/lib/Target/R600/R600ISelLowering.cpp
> > +++ b/lib/Target/R600/R600ISelLowering.cpp
> > @@ -105,7 +105,6 @@ MachineBasicBlock * R600TargetLowering::EmitInstrWithCustomInserter(
> >  
> >    switch (MI->getOpcode()) {
> >    default: return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
> > -  case AMDGPU::SHADER_TYPE: break;
> >    case AMDGPU::CLAMP_R600: {
> >      MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
> >                                                     AMDGPU::MOV,
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index 7aa1037..a9eca31 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -83,12 +83,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
> >    default:
> >      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
> >    case AMDGPU::BRANCH: return BB;
> > -  case AMDGPU::SHADER_TYPE:
> > -    BB->getParent()->getInfo<SIMachineFunctionInfo>()->ShaderType =
> > -                                        MI->getOperand(0).getImm();
> > -    MI->eraseFromParent();
> > -    break;
> > -
> >    case AMDGPU::SI_INTERP:
> >      LowerSI_INTERP(MI, *BB, I, MRI);
> >      break;
> > diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
> > index 7e59b42..e5fbf05 100644
> > --- a/lib/Target/R600/SIMachineFunctionInfo.cpp
> > +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
> > @@ -10,11 +10,25 @@
> >  
> >  
> >  #include "SIMachineFunctionInfo.h"
> > +#include "llvm/IR/Attributes.h"
> > +#include "llvm/IR/Function.h"
> >  
> >  using namespace llvm;
> >  
> > +const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType";
> > +
> >  SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
> >    : MachineFunctionInfo(),
> >      SPIPSInputAddr(0),
> > -    ShaderType(0)
> > -  { }
> > +    ShaderType(0) {
> > +
> > +  AttributeSet Set = MF.getFunction()->getAttributes();
> > +  Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
> > +                                 ShaderTypeAttribute);
> > +
> > +  if (A.isStringAttribute()) {
> > +    StringRef Str = A.getValueAsString();
> > +    if (Str.getAsInteger(0, ShaderType))
> > +      llvm_unreachable("Can't parse shader type!");
> > +  }
> > +}
> > diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
> > index 47271f5..e2f97a4 100644
> > --- a/lib/Target/R600/SIMachineFunctionInfo.h
> > +++ b/lib/Target/R600/SIMachineFunctionInfo.h
> > @@ -23,6 +23,8 @@ namespace llvm {
> >  /// tells the hardware which interpolation parameters to load.
> >  class SIMachineFunctionInfo : public MachineFunctionInfo {
> >  public:
> > +  static const char *ShaderTypeAttribute;
> > +
> >    SIMachineFunctionInfo(const MachineFunction &MF);
> >    unsigned SPIPSInputAddr;
> >    unsigned ShaderType;
> > -- 
> > 1.7.10.4
> > 
> 
> > From 92bc687ec5bc549b8d473dade20e56868386415c Mon Sep 17 00:00:00 2001
> > From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> > Date: Mon, 4 Mar 2013 12:18:03 +0100
> > Subject: [PATCH 4/7] R600/SI: add proper formal parameter handling for SI
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> > ---
> >  lib/Target/R600/AMDGPU.td              |    1 +
> >  lib/Target/R600/AMDGPUCallingConv.td   |   42 +++++++++++++
> >  lib/Target/R600/AMDGPUISelLowering.cpp |   20 +++----
> >  lib/Target/R600/AMDGPUISelLowering.h   |    9 +--
> >  lib/Target/R600/AMDILISelLowering.cpp  |    5 --
> >  lib/Target/R600/SIISelLowering.cpp     |  101 ++++++++++++++++++++++++++++++++
> >  lib/Target/R600/SIISelLowering.h       |    7 +++
> >  7 files changed, 163 insertions(+), 22 deletions(-)
> >  create mode 100644 lib/Target/R600/AMDGPUCallingConv.td
> > 
> > diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td
> > index 40f4741..1a26c77 100644
> > --- a/lib/Target/R600/AMDGPU.td
> > +++ b/lib/Target/R600/AMDGPU.td
> > @@ -38,3 +38,4 @@ include "AMDGPUInstrInfo.td"
> >  include "AMDGPUIntrinsics.td"
> >  include "AMDGPURegisterInfo.td"
> >  include "AMDGPUInstructions.td"
> > +include "AMDGPUCallingConv.td"
> > diff --git a/lib/Target/R600/AMDGPUCallingConv.td b/lib/Target/R600/AMDGPUCallingConv.td
> > new file mode 100644
> > index 0000000..45ae37e
> > --- /dev/null
> > +++ b/lib/Target/R600/AMDGPUCallingConv.td
> > @@ -0,0 +1,42 @@
> > +//===---- AMDCallingConv.td - Calling Conventions for Radeon GPUs ---------===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> > +//===----------------------------------------------------------------------===//
> > +//
> > +// This describes the calling conventions for the AMD Radeon GPUs.
> > +//
> > +//===----------------------------------------------------------------------===//
> > +
> > +// Inversion of CCIfInReg
> > +class CCIfNotInReg<CCAction A> : CCIf<"!ArgFlags.isInReg()", A> {}
> > +
> > +// Calling convention for SI
> > +def CC_SI : CallingConv<[
> > +
> > +  CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
> > +    SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
> > +    SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15
> > +  ]>>>,
> > +
> > +  CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
> > +    [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
> > +    [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR12, SGPR15 ]
> > +  >>>,
> > +
> > +  CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
> > +    VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
> > +    VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
> > +    VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
> > +    VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
> > +  ]>>>
> > +
> > +]>;
> > +
> > +def CC_AMDGPU : CallingConv<[
> > +  CCIf<"State.getTarget().getSubtarget<AMDGPUSubtarget>().device()"#
> > +       "->getGeneration() == AMDGPUDeviceInfo::HD7XXX", CCDelegateTo<CC_SI>>
> > +]>;
> > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> > index 0a33264..5995b6f 100644
> > --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> > @@ -14,7 +14,10 @@
> >  //===----------------------------------------------------------------------===//
> >  
> >  #include "AMDGPUISelLowering.h"
> > +#include "AMDGPURegisterInfo.h"
> >  #include "AMDILIntrinsicInfo.h"
> > +#include "AMDGPUSubtarget.h"
> > +#include "llvm/CodeGen/CallingConvLower.h"
> >  #include "llvm/CodeGen/MachineFunction.h"
> >  #include "llvm/CodeGen/MachineRegisterInfo.h"
> >  #include "llvm/CodeGen/SelectionDAG.h"
> > @@ -22,6 +25,8 @@
> >  
> >  using namespace llvm;
> >  
> > +#include "AMDGPUGenCallingConv.inc"
> > +
> >  AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> >    TargetLowering(TM, new TargetLoweringObjectFileELF()) {
> >  
> > @@ -64,17 +69,10 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> >  // TargetLowering Callbacks
> >  //===---------------------------------------------------------------------===//
> >  
> > -SDValue AMDGPUTargetLowering::LowerFormalArguments(
> > -                                      SDValue Chain,
> > -                                      CallingConv::ID CallConv,
> > -                                      bool isVarArg,
> > -                                      const SmallVectorImpl<ISD::InputArg> &Ins,
> > -                                      DebugLoc DL, SelectionDAG &DAG,
> > -                                      SmallVectorImpl<SDValue> &InVals) const {
> > -  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
> > -    InVals.push_back(SDValue());
> > -  }
> > -  return Chain;
> > +void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
> > +                             const SmallVectorImpl<ISD::InputArg> &Ins) const {
> > +
> > +  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
> >  }
> >  
> >  SDValue AMDGPUTargetLowering::LowerReturn(
> > diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h
> > index 9e7d997..f31b646 100644
> > --- a/lib/Target/R600/AMDGPUISelLowering.h
> > +++ b/lib/Target/R600/AMDGPUISelLowering.h
> > @@ -39,15 +39,12 @@ protected:
> >    bool isHWTrueValue(SDValue Op) const;
> >    bool isHWFalseValue(SDValue Op) const;
> >  
> > +  void AnalyzeFormalArguments(CCState &State,
> > +                              const SmallVectorImpl<ISD::InputArg> &Ins) const;
> > +
> >  public:
> >    AMDGPUTargetLowering(TargetMachine &TM);
> >  
> > -  virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
> > -                             bool isVarArg,
> > -                             const SmallVectorImpl<ISD::InputArg> &Ins,
> > -                             DebugLoc DL, SelectionDAG &DAG,
> > -                             SmallVectorImpl<SDValue> &InVals) const;
> > -
> >    virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
> >                                bool isVarArg,
> >                                const SmallVectorImpl<ISD::OutputArg> &Outs,
> > diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp
> > index f65e1f3..922cac1 100644
> > --- a/lib/Target/R600/AMDILISelLowering.cpp
> > +++ b/lib/Target/R600/AMDILISelLowering.cpp
> > @@ -33,11 +33,6 @@
> >  
> >  using namespace llvm;
> >  //===----------------------------------------------------------------------===//
> > -// Calling Convention Implementation
> > -//===----------------------------------------------------------------------===//
> > -#include "AMDGPUGenCallingConv.inc"
> > -
> > -//===----------------------------------------------------------------------===//
> >  // TargetLowering Implementation Help Functions End
> >  //===----------------------------------------------------------------------===//
> >  
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index a9eca31..8b46cb0 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -18,6 +18,8 @@
> >  #include "SIInstrInfo.h"
> >  #include "SIMachineFunctionInfo.h"
> >  #include "SIRegisterInfo.h"
> > +#include "llvm/IR/Function.h"
> > +#include "llvm/CodeGen/CallingConvLower.h"
> >  #include "llvm/CodeGen/MachineInstrBuilder.h"
> >  #include "llvm/CodeGen/MachineRegisterInfo.h"
> >  #include "llvm/CodeGen/SelectionDAG.h"
> > @@ -74,6 +76,105 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> >    setTargetDAGCombine(ISD::SETCC);
> >  }
> >  
> > +SDValue SITargetLowering::LowerFormalArguments(
> > +                                      SDValue Chain,
> > +                                      CallingConv::ID CallConv,
> > +                                      bool isVarArg,
> > +                                      const SmallVectorImpl<ISD::InputArg> &Ins,
> > +                                      DebugLoc DL, SelectionDAG &DAG,
> > +                                      SmallVectorImpl<SDValue> &InVals) const {
> > +
> > +  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
> > +
> > +  MachineFunction &MF = DAG.getMachineFunction();
> > +  FunctionType *FType = MF.getFunction()->getFunctionType();
> > +
> > +  assert(CallConv == CallingConv::C);
> > +
> > +  SmallVector<ISD::InputArg, 16> Splits;
> > +  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
> > +    const ISD::InputArg &Arg = Ins[i];
> > +   
> > +    // Split vertices into their elements
> > +    if (Arg.VT.isVector()) {
> > +      ISD::InputArg NewArg = Arg;
> > +      NewArg.Flags.setSplit();
> > +      NewArg.VT = Arg.VT.getVectorElementType();
> > +
> > +      // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
> > +      // three or five element vertex only needs three or five registers,
> > +      // NOT four or eigth.
> > +      Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
> > +      unsigned NumElements = ParamType->getVectorNumElements();
> > +
> > +      for (unsigned j = 0; j != NumElements; ++j) {
> > +        Splits.push_back(NewArg);
> > +        NewArg.PartOffset += NewArg.VT.getStoreSize();
> > +      }
> > +
> > +    } else {
> > +      Splits.push_back(Arg);
> > +    }
> > +  }
> > +
> > +  SmallVector<CCValAssign, 16> ArgLocs;
> > +  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
> > +                 getTargetMachine(), ArgLocs, *DAG.getContext());
> > +
> > +  AnalyzeFormalArguments(CCInfo, Splits);
> > +
> > +  for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
> > +
> > +    CCValAssign &VA = ArgLocs[ArgIdx++];
> > +    assert(VA.isRegLoc() && "Parameter must be in a register!");
> > +
> > +    unsigned Reg = VA.getLocReg();
> > +    MVT VT = VA.getLocVT();
> > +
> > +    if (VT == MVT::i64) {
> > +      // For now assume it is a pointer
> > +      Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
> > +                                     &AMDGPU::SReg_64RegClass);
> > +      Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
> > +      InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
> > +      continue;
> > +    }
> > +
> > +    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
> > +
> > +    Reg = MF.addLiveIn(Reg, RC);
> > +    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
> > +
> > +    const ISD::InputArg &Arg = Ins[i];
> > +    if (Arg.VT.isVector()) {
> > +
> > +      // Build a vector from the registers
> > +      Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
> > +      unsigned NumElements = ParamType->getVectorNumElements();
> > +
> > +      SmallVector<SDValue, 4> Regs;
> > +      Regs.push_back(Val);
> > +      for (unsigned j = 1; j != NumElements; ++j) {
> > +        Reg = ArgLocs[ArgIdx++].getLocReg();
> > +        Reg = MF.addLiveIn(Reg, RC);
> > +        Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
> > +      }
> > +
> > +      // Fill up the missing vector elements
> > +      NumElements = Arg.VT.getVectorNumElements() - NumElements;
> > +      for (unsigned j = 0; j != NumElements; ++j)
> > +        Regs.push_back(DAG.getUNDEF(VT));
> > + 
> > +      InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT,
> > +                                   Regs.data(), Regs.size()));
> > +      continue;
> > +    }
> > +
> > +    InVals.push_back(Val);
> > +  }
> > +  return Chain;
> > +}
> > +
> >  MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
> >      MachineInstr * MI, MachineBasicBlock * BB) const {
> >    MachineRegisterInfo & MRI = BB->getParent()->getRegInfo();
> > diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> > index 737162f..6d77d88 100644
> > --- a/lib/Target/R600/SIISelLowering.h
> > +++ b/lib/Target/R600/SIISelLowering.h
> > @@ -43,6 +43,13 @@ class SITargetLowering : public AMDGPUTargetLowering {
> >  
> >  public:
> >    SITargetLowering(TargetMachine &tm);
> > +
> > +  SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv,
> > +                               bool isVarArg,
> > +                               const SmallVectorImpl<ISD::InputArg> &Ins,
> > +                               DebugLoc DL, SelectionDAG &DAG,
> > +                               SmallVectorImpl<SDValue> &InVals) const;
> > +
> >    virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
> >                                                MachineBasicBlock * BB) const;
> >    virtual EVT getSetCCResultType(EVT VT) const;
> > -- 
> > 1.7.10.4
> > 
> 
> > From 942c4da4c81157f21c37a031231faa809647470c Mon Sep 17 00:00:00 2001
> > From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> > Date: Sat, 2 Mar 2013 16:40:22 +0100
> > Subject: [PATCH 5/7] R600/SI: remove SGPR address space
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> > ---
> >  lib/Target/R600/AMDIL.h            |   35 +++++++++++++--------------
> >  lib/Target/R600/SIISelLowering.cpp |   47 ------------------------------------
> >  lib/Target/R600/SIISelLowering.h   |    1 -
> >  lib/Target/R600/SIInstructions.td  |    5 ++--
> >  4 files changed, 19 insertions(+), 69 deletions(-)
> > 
> > diff --git a/lib/Target/R600/AMDIL.h b/lib/Target/R600/AMDIL.h
> > index b39fbdb..39ab664 100644
> > --- a/lib/Target/R600/AMDIL.h
> > +++ b/lib/Target/R600/AMDIL.h
> > @@ -96,24 +96,23 @@ enum AddressSpaces {
> >    ADDRESS_NONE     = 5, ///< Address space for unknown memory.
> >    PARAM_D_ADDRESS  = 6, ///< Address space for direct addressible parameter memory (CONST0)
> >    PARAM_I_ADDRESS  = 7, ///< Address space for indirect addressible parameter memory (VTX1)
> > -  USER_SGPR_ADDRESS = 8, ///< Address space for USER_SGPRS on SI
> > -  CONSTANT_BUFFER_0 = 9,
> > -  CONSTANT_BUFFER_1 = 10,
> > -  CONSTANT_BUFFER_2 = 11,
> > -  CONSTANT_BUFFER_3 = 12,
> > -  CONSTANT_BUFFER_4 = 13,
> > -  CONSTANT_BUFFER_5 = 14,
> > -  CONSTANT_BUFFER_6 = 15,
> > -  CONSTANT_BUFFER_7 = 16,
> > -  CONSTANT_BUFFER_8 = 17,
> > -  CONSTANT_BUFFER_9 = 18,
> > -  CONSTANT_BUFFER_10 = 19,
> > -  CONSTANT_BUFFER_11 = 20,
> > -  CONSTANT_BUFFER_12 = 21,
> > -  CONSTANT_BUFFER_13 = 22,
> > -  CONSTANT_BUFFER_14 = 23,
> > -  CONSTANT_BUFFER_15 = 24,
> > -  LAST_ADDRESS     = 25
> > +  CONSTANT_BUFFER_0 = 8,
> > +  CONSTANT_BUFFER_1 = 9,
> > +  CONSTANT_BUFFER_2 = 10,
> > +  CONSTANT_BUFFER_3 = 11,
> > +  CONSTANT_BUFFER_4 = 12,
> > +  CONSTANT_BUFFER_5 = 13,
> > +  CONSTANT_BUFFER_6 = 14,
> > +  CONSTANT_BUFFER_7 = 15,
> > +  CONSTANT_BUFFER_8 = 16,
> > +  CONSTANT_BUFFER_9 = 17,
> > +  CONSTANT_BUFFER_10 = 18,
> > +  CONSTANT_BUFFER_11 = 19,
> > +  CONSTANT_BUFFER_12 = 20,
> > +  CONSTANT_BUFFER_13 = 21,
> > +  CONSTANT_BUFFER_14 = 22,
> > +  CONSTANT_BUFFER_15 = 23,
> > +  LAST_ADDRESS     = 24
> >  };
> >  
> >  } // namespace AMDGPUAS
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index 8b46cb0..a65e06f 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -62,11 +62,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> >  
> >    setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> >  
> > -  // We need to custom lower loads from the USER_SGPR address space, so we can
> > -  // add the SGPRs as livein registers.
> > -  setOperationAction(ISD::LOAD, MVT::i32, Custom);
> > -  setOperationAction(ISD::LOAD, MVT::i64, Custom);
> > -
> >    setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
> >    setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
> >  
> > @@ -245,7 +240,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> >    switch (Op.getOpcode()) {
> >    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
> >    case ISD::BRCOND: return LowerBRCOND(Op, DAG);
> > -  case ISD::LOAD: return LowerLOAD(Op, DAG);
> >    case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> >    case ISD::INTRINSIC_WO_CHAIN: {
> >      unsigned IntrinsicID =
> > @@ -357,47 +351,6 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
> >    return Chain;
> >  }
> >  
> > -SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
> > -  EVT VT = Op.getValueType();
> > -  LoadSDNode *Ptr = dyn_cast<LoadSDNode>(Op);
> > -
> > -  assert(Ptr);
> > -
> > -  unsigned AddrSpace = Ptr->getPointerInfo().getAddrSpace();
> > -
> > -  // We only need to lower USER_SGPR address space loads
> > -  if (AddrSpace != AMDGPUAS::USER_SGPR_ADDRESS) {
> > -    return SDValue();
> > -  }
> > -
> > -  // Loads from the USER_SGPR address space can only have constant value
> > -  // pointers.
> > -  ConstantSDNode *BasePtr = dyn_cast<ConstantSDNode>(Ptr->getBasePtr());
> > -  assert(BasePtr);
> > -
> > -  unsigned TypeDwordWidth = VT.getSizeInBits() / 32;
> > -  const TargetRegisterClass * dstClass;
> > -  switch (TypeDwordWidth) {
> > -    default:
> > -      assert(!"USER_SGPR value size not implemented");
> > -      return SDValue();
> > -    case 1:
> > -      dstClass = &AMDGPU::SReg_32RegClass;
> > -      break;
> > -    case 2:
> > -      dstClass = &AMDGPU::SReg_64RegClass;
> > -      break;
> > -  }
> > -  uint64_t Index = BasePtr->getZExtValue();
> > -  assert(Index % TypeDwordWidth == 0 && "USER_SGPR not properly aligned");
> > -  unsigned SGPRIndex = Index / TypeDwordWidth;
> > -  unsigned Reg = dstClass->getRegister(SGPRIndex);
> > -
> > -  DAG.ReplaceAllUsesOfValueWith(Op, CreateLiveInRegister(DAG, dstClass, Reg,
> > -                                                         VT));
> > -  return SDValue();
> > -}
> > -
> >  SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
> >    SDValue LHS = Op.getOperand(0);
> >    SDValue RHS = Op.getOperand(1);
> > diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> > index 6d77d88..8ae1400 100644
> > --- a/lib/Target/R600/SIISelLowering.h
> > +++ b/lib/Target/R600/SIISelLowering.h
> > @@ -31,7 +31,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
> >    void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
> >                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
> >  
> > -  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
> >    SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
> >    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
> >  
> > diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> > index bc6d604..15a3a98 100644
> > --- a/lib/Target/R600/SIInstructions.td
> > +++ b/lib/Target/R600/SIInstructions.td
> > @@ -1337,9 +1337,8 @@ def : Pat <
> >  /********** ===================== **********/
> >  
> >  def : Pat <
> > -  (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, SReg_32:$params),
> > -  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr,
> > -                    (S_MOV_B32 SReg_32:$params))
> > +  (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
> > +  (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
> >  >;
> >  
> >  def : Pat <
> > -- 
> > 1.7.10.4
> > 
> 
> > From fa0ee67efe2f38eef47579fe9a55ce03f08dec4e Mon Sep 17 00:00:00 2001
> > From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> > Date: Tue, 5 Mar 2013 11:12:42 +0100
> > Subject: [PATCH 6/7] R600/SI: remove SI_vs_load_buffer_index
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> > ---
> >  lib/Target/R600/SIISelLowering.cpp |   14 --------------
> >  lib/Target/R600/SIIntrinsics.td    |    1 -
> >  2 files changed, 15 deletions(-)
> > 
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index a65e06f..34ab229 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -60,8 +60,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> >    setOperationAction(ISD::ADD, MVT::i64, Legal);
> >    setOperationAction(ISD::ADD, MVT::i32, Legal);
> >  
> > -  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> > -
> >    setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
> >    setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
> >  
> > @@ -241,18 +239,6 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
> >    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
> >    case ISD::BRCOND: return LowerBRCOND(Op, DAG);
> >    case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
> > -  case ISD::INTRINSIC_WO_CHAIN: {
> > -    unsigned IntrinsicID =
> > -                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> > -    EVT VT = Op.getValueType();
> > -    switch (IntrinsicID) {
> > -    case AMDGPUIntrinsic::SI_vs_load_buffer_index:
> > -      return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
> > -                                  AMDGPU::VGPR0, VT);
> > -    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
> > -    }
> > -    break;
> > -  }
> >    }
> >    return SDValue();
> >  }
> > diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
> > index 04308d8..aa17510 100644
> > --- a/lib/Target/R600/SIIntrinsics.td
> > +++ b/lib/Target/R600/SIIntrinsics.td
> > @@ -18,7 +18,6 @@ let TargetPrefix = "SI", isTarget = 1 in {
> >    def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
> >    /* XXX: We may need a seperate intrinsic here for loading integer values */
> >    def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_i64_ty, llvm_i32_ty], []>;
> > -  def int_SI_vs_load_buffer_index : Intrinsic <[llvm_i32_ty], [], [IntrNoMem]>;
> >    def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_v16i8_ty, llvm_i16_ty, llvm_i32_ty], [IntrReadMem]> ;
> >    def int_SI_wqm : Intrinsic <[], [], []>;
> >  
> > -- 
> > 1.7.10.4
> > 
> 
> > From 87e1c8a14288c05f435e180d6ab52a37c075d3ef Mon Sep 17 00:00:00 2001
> > From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig at amd.com>
> > Date: Sat, 2 Mar 2013 18:47:20 +0100
> > Subject: [PATCH 7/7] R600/SI: rework input interpolation
> > MIME-Version: 1.0
> > Content-Type: text/plain; charset=UTF-8
> > Content-Transfer-Encoding: 8bit
> > 
> > Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> > ---
> >  lib/Target/R600/AMDGPU.h                  |    1 -
> >  lib/Target/R600/AMDGPUAsmPrinter.cpp      |    2 +-
> >  lib/Target/R600/AMDGPUTargetMachine.cpp   |    5 -
> >  lib/Target/R600/SIAssignInterpRegs.cpp    |  152 -----------------------------
> >  lib/Target/R600/SIISelLowering.cpp        |   69 +++++++------
> >  lib/Target/R600/SIISelLowering.h          |    4 -
> >  lib/Target/R600/SIInstructions.td         |   67 ++-----------
> >  lib/Target/R600/SIIntrinsics.td           |   13 +--
> >  lib/Target/R600/SIMachineFunctionInfo.cpp |    4 +-
> >  lib/Target/R600/SIMachineFunctionInfo.h   |    2 +-
> >  lib/Target/R600/SIRegisterInfo.td         |   47 +--------
> >  11 files changed, 46 insertions(+), 320 deletions(-)
> >  delete mode 100644 lib/Target/R600/SIAssignInterpRegs.cpp
> > 
> > diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> > index ba87918..0eb7b94 100644
> > --- a/lib/Target/R600/AMDGPU.h
> > +++ b/lib/Target/R600/AMDGPU.h
> > @@ -27,7 +27,6 @@ FunctionPass *createR600LowerConstCopy(TargetMachine &tm);
> >  
> >  // SI Passes
> >  FunctionPass *createSIAnnotateControlFlowPass();
> > -FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
> >  FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
> >  FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
> >  FunctionPass *createSIInsertWaits(TargetMachine &tm);
> > diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> > index c30dbe4..f600144 100644
> > --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp
> > +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp
> > @@ -141,5 +141,5 @@ void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
> >    SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
> >    OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
> >    OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
> > -  OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
> > +  OutStreamer.EmitIntValue(MFI->PSInputAddr, 4);
> >  }
> > diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> > index e2f00be..31cd9b5 100644
> > --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> > +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> > @@ -112,11 +112,6 @@ bool AMDGPUPassConfig::addInstSelector() {
> >  }
> >  
> >  bool AMDGPUPassConfig::addPreRegAlloc() {
> > -  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> > -
> > -  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
> > -    addPass(createSIAssignInterpRegsPass(*TM));
> > -  }
> >    addPass(createAMDGPUConvertToISAPass(*TM));
> >    return false;
> >  }
> > diff --git a/lib/Target/R600/SIAssignInterpRegs.cpp b/lib/Target/R600/SIAssignInterpRegs.cpp
> > deleted file mode 100644
> > index 832e44d..0000000
> > --- a/lib/Target/R600/SIAssignInterpRegs.cpp
> > +++ /dev/null
> > @@ -1,152 +0,0 @@
> > -//===-- SIAssignInterpRegs.cpp - Assign interpolation registers -----------===//
> > -//
> > -//                     The LLVM Compiler Infrastructure
> > -//
> > -// This file is distributed under the University of Illinois Open Source
> > -// License. See LICENSE.TXT for details.
> > -//
> > -//===----------------------------------------------------------------------===//
> > -//
> > -/// \file
> > -/// \brief This pass maps the pseudo interpolation registers to the correct physical
> > -/// registers.
> > -//
> > -/// Prior to executing a fragment shader, the GPU loads interpolation
> > -/// parameters into physical registers.  The specific physical register that each
> > -/// interpolation parameter ends up in depends on the type of the interpolation
> > -/// parameter as well as how many interpolation parameters are used by the
> > -/// shader.
> > -//
> > -//===----------------------------------------------------------------------===//
> > -
> > -
> > -
> > -#include "AMDGPU.h"
> > -#include "AMDIL.h"
> > -#include "SIMachineFunctionInfo.h"
> > -#include "llvm/CodeGen/MachineFunctionPass.h"
> > -#include "llvm/CodeGen/MachineInstrBuilder.h"
> > -#include "llvm/CodeGen/MachineRegisterInfo.h"
> > -
> > -using namespace llvm;
> > -
> > -namespace {
> > -
> > -class SIAssignInterpRegsPass : public MachineFunctionPass {
> > -
> > -private:
> > -  static char ID;
> > -  TargetMachine &TM;
> > -
> > -  void addLiveIn(MachineFunction * MF,  MachineRegisterInfo & MRI,
> > -                 unsigned physReg, unsigned virtReg);
> > -
> > -public:
> > -  SIAssignInterpRegsPass(TargetMachine &tm) :
> > -    MachineFunctionPass(ID), TM(tm) { }
> > -
> > -  virtual bool runOnMachineFunction(MachineFunction &MF);
> > -
> > -  const char *getPassName() const { return "SI Assign intrpolation registers"; }
> > -};
> > -
> > -} // End anonymous namespace
> > -
> > -char SIAssignInterpRegsPass::ID = 0;
> > -
> > -#define INTERP_VALUES 16
> > -#define REQUIRED_VALUE_MAX_INDEX 7
> > -
> > -struct InterpInfo {
> > -  bool Enabled;
> > -  unsigned Regs[3];
> > -  unsigned RegCount;
> > -};
> > -
> > -
> > -FunctionPass *llvm::createSIAssignInterpRegsPass(TargetMachine &tm) {
> > -  return new SIAssignInterpRegsPass(tm);
> > -}
> > -
> > -bool SIAssignInterpRegsPass::runOnMachineFunction(MachineFunction &MF) {
> > -
> > -  struct InterpInfo InterpUse[INTERP_VALUES] = {
> > -    {false, {AMDGPU::PERSP_SAMPLE_I, AMDGPU::PERSP_SAMPLE_J}, 2},
> > -    {false, {AMDGPU::PERSP_CENTER_I, AMDGPU::PERSP_CENTER_J}, 2},
> > -    {false, {AMDGPU::PERSP_CENTROID_I, AMDGPU::PERSP_CENTROID_J}, 2},
> > -    {false, {AMDGPU::PERSP_I_W, AMDGPU::PERSP_J_W, AMDGPU::PERSP_1_W}, 3},
> > -    {false, {AMDGPU::LINEAR_SAMPLE_I, AMDGPU::LINEAR_SAMPLE_J}, 2},
> > -    {false, {AMDGPU::LINEAR_CENTER_I, AMDGPU::LINEAR_CENTER_J}, 2},
> > -    {false, {AMDGPU::LINEAR_CENTROID_I, AMDGPU::LINEAR_CENTROID_J}, 2},
> > -    {false, {AMDGPU::LINE_STIPPLE_TEX_COORD}, 1},
> > -    {false, {AMDGPU::POS_X_FLOAT}, 1},
> > -    {false, {AMDGPU::POS_Y_FLOAT}, 1},
> > -    {false, {AMDGPU::POS_Z_FLOAT}, 1},
> > -    {false, {AMDGPU::POS_W_FLOAT}, 1},
> > -    {false, {AMDGPU::FRONT_FACE}, 1},
> > -    {false, {AMDGPU::ANCILLARY}, 1},
> > -    {false, {AMDGPU::SAMPLE_COVERAGE}, 1},
> > -    {false, {AMDGPU::POS_FIXED_PT}, 1}
> > -  };
> > -
> > -  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
> > -  // This pass is only needed for pixel shaders.
> > -  if (MFI->ShaderType != ShaderType::PIXEL) {
> > -    return false;
> > -  }
> > -  MachineRegisterInfo &MRI = MF.getRegInfo();
> > -  bool ForceEnable = true;
> > -
> > -  // First pass, mark the interpolation values that are used.
> > -  for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
> > -    for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
> > -                                                               RegIdx++) {
> > -      InterpUse[InterpIdx].Enabled = InterpUse[InterpIdx].Enabled ||
> > -                            !MRI.use_empty(InterpUse[InterpIdx].Regs[RegIdx]);
> > -      if (InterpUse[InterpIdx].Enabled &&
> > -          InterpIdx <= REQUIRED_VALUE_MAX_INDEX) {
> > -        ForceEnable = false;
> > -      }
> > -    }
> > -  }
> > -
> > -  // At least one interpolation mode must be enabled or else the GPU will hang.
> > -  if (ForceEnable) {
> > -    InterpUse[0].Enabled = true;
> > -  }
> > -
> > -  unsigned UsedVgprs = 0;
> > -
> > -  // Second pass, replace with VGPRs.
> > -  for (unsigned InterpIdx = 0; InterpIdx < INTERP_VALUES; InterpIdx++) {
> > -    if (!InterpUse[InterpIdx].Enabled) {
> > -      continue;
> > -    }
> > -    MFI->SPIPSInputAddr |= (1 << InterpIdx);
> > -
> > -    for (unsigned RegIdx = 0; RegIdx < InterpUse[InterpIdx].RegCount;
> > -                                                  RegIdx++, UsedVgprs++) {
> > -      unsigned NewReg = AMDGPU::VReg_32RegClass.getRegister(UsedVgprs);
> > -      unsigned VirtReg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> > -      MRI.replaceRegWith(InterpUse[InterpIdx].Regs[RegIdx], VirtReg);
> > -      addLiveIn(&MF, MRI, NewReg, VirtReg);
> > -    }
> > -  }
> > -
> > -  return false;
> > -}
> > -
> > -void SIAssignInterpRegsPass::addLiveIn(MachineFunction * MF,
> > -                           MachineRegisterInfo & MRI,
> > -                           unsigned physReg, unsigned virtReg) {
> > -    const TargetInstrInfo * TII = TM.getInstrInfo();
> > -    if (!MRI.isLiveIn(physReg)) {
> > -      MRI.addLiveIn(physReg, virtReg);
> > -      MF->front().addLiveIn(physReg);
> > -      BuildMI(MF->front(), MF->front().begin(), DebugLoc(),
> > -              TII->get(TargetOpcode::COPY), virtReg)
> > -                .addReg(physReg);
> > -    } else {
> > -      MRI.replaceRegWith(virtReg, MRI.getLiveInVirtReg(physReg));
> > -    }
> > -}
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index 34ab229..fead115 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -14,6 +14,7 @@
> >  
> >  #include "SIISelLowering.h"
> >  #include "AMDIL.h"
> > +#include "AMDGPU.h"
> >  #include "AMDILIntrinsicInfo.h"
> >  #include "SIInstrInfo.h"
> >  #include "SIMachineFunctionInfo.h"
> > @@ -81,14 +82,32 @@ SDValue SITargetLowering::LowerFormalArguments(
> >  
> >    MachineFunction &MF = DAG.getMachineFunction();
> >    FunctionType *FType = MF.getFunction()->getFunctionType();
> > +  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
> >  
> >    assert(CallConv == CallingConv::C);
> >  
> >    SmallVector<ISD::InputArg, 16> Splits;
> > -  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
> > +  uint32_t Skipped = 0;
> > +
> > +  for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
> >      const ISD::InputArg &Arg = Ins[i];
> >     
> > -    // Split vertices into their elements
> > +    // First check if it's a PS input addr 
> > +    if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg()) {
> > +
> > +      assert((PSInputNum <= 15) && "Too many PS inputs!");
> > +
> > +      if (!Arg.Used) {
> > +        // We can savely skip PS inputs
> > +        Skipped |= 1 << i;
> > +        ++PSInputNum;
> > +        continue;
> > +      }
> > +
> > +      Info->PSInputAddr |= 1 << PSInputNum++;
> > +    }
> > +
> > +    // Second split vertices into their elements
> >      if (Arg.VT.isVector()) {
> >        ISD::InputArg NewArg = Arg;
> >        NewArg.Flags.setSplit();
> > @@ -114,10 +133,22 @@ SDValue SITargetLowering::LowerFormalArguments(
> >    CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
> >                   getTargetMachine(), ArgLocs, *DAG.getContext());
> >  
> > +  // At least one interpolation mode must be enabled or else the GPU will hang.
> > +  if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
> > +    Info->PSInputAddr |= 1;
> > +    CCInfo.AllocateReg(AMDGPU::VGPR0);
> > +    CCInfo.AllocateReg(AMDGPU::VGPR1);
> > +  }
> > +
> >    AnalyzeFormalArguments(CCInfo, Splits);
> >  
> >    for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
> >  
> > +    if (Skipped & (1 << i)) {
> > +      InVals.push_back(SDValue());
> > +      continue;
> > +    }
> > +
> >      CCValAssign &VA = ArgLocs[ArgIdx++];
> >      assert(VA.isRegLoc() && "Parameter must be in a register!");
> >  
> > @@ -177,9 +208,6 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
> >    default:
> >      return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
> >    case AMDGPU::BRANCH: return BB;
> > -  case AMDGPU::SI_INTERP:
> > -    LowerSI_INTERP(MI, *BB, I, MRI);
> > -    break;
> >    case AMDGPU::SI_WQM:
> >      LowerSI_WQM(MI, *BB, I, MRI);
> >      break;
> > @@ -195,37 +223,6 @@ void SITargetLowering::LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
> >    MI->eraseFromParent();
> >  }
> >  
> > -void SITargetLowering::LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
> > -    MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const {
> > -  unsigned tmp = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
> > -  unsigned M0 = MRI.createVirtualRegister(&AMDGPU::M0RegRegClass);
> > -  MachineOperand dst = MI->getOperand(0);
> > -  MachineOperand iReg = MI->getOperand(1);
> > -  MachineOperand jReg = MI->getOperand(2);
> > -  MachineOperand attr_chan = MI->getOperand(3);
> > -  MachineOperand attr = MI->getOperand(4);
> > -  MachineOperand params = MI->getOperand(5);
> > -
> > -  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::S_MOV_B32), M0)
> > -          .addOperand(params);
> > -
> > -  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P1_F32), tmp)
> > -          .addOperand(iReg)
> > -          .addOperand(attr_chan)
> > -          .addOperand(attr)
> > -          .addReg(M0);
> > -
> > -  BuildMI(BB, I, BB.findDebugLoc(I), TII->get(AMDGPU::V_INTERP_P2_F32))
> > -          .addOperand(dst)
> > -          .addReg(tmp)
> > -          .addOperand(jReg)
> > -          .addOperand(attr_chan)
> > -          .addOperand(attr)
> > -          .addReg(M0);
> > -
> > -  MI->eraseFromParent();
> > -}
> > -
> >  EVT SITargetLowering::getSetCCResultType(EVT VT) const {
> >    return MVT::i1;
> >  }
> > diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h
> > index 8ae1400..0411565 100644
> > --- a/lib/Target/R600/SIISelLowering.h
> > +++ b/lib/Target/R600/SIISelLowering.h
> > @@ -24,10 +24,6 @@ class SITargetLowering : public AMDGPUTargetLowering {
> >    const SIInstrInfo * TII;
> >    const TargetRegisterInfo * TRI;
> >  
> > -  void LowerMOV_IMM(MachineInstr *MI, MachineBasicBlock &BB,
> > -              MachineBasicBlock::iterator I, unsigned Opocde) const;
> > -  void LowerSI_INTERP(MachineInstr *MI, MachineBasicBlock &BB,
> > -              MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
> >    void LowerSI_WQM(MachineInstr *MI, MachineBasicBlock &BB,
> >                MachineBasicBlock::iterator I, MachineRegisterInfo & MRI) const;
> >  
> > diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> > index 15a3a98..0ab9e4e 100644
> > --- a/lib/Target/R600/SIInstructions.td
> > +++ b/lib/Target/R600/SIInstructions.td
> > @@ -1044,13 +1044,6 @@ def S_ABSDIFF_I32 : SOP2_32 <0x0000002c, "S_ABSDIFF_I32", []>;
> >  
> >  let isCodeGenOnly = 1, isPseudo = 1 in {
> >  
> > -def SET_M0 : InstSI <
> > -  (outs SReg_32:$dst),
> > -  (ins i32imm:$src0),
> > -  "SET_M0 $dst, $src0",
> > -  [(set SReg_32:$dst, (int_SI_set_M0 imm:$src0))]
> > ->;
> > -
> >  def LOAD_CONST : AMDGPUShaderInst <
> >    (outs GPRF32:$dst),
> >    (ins i32imm:$src),
> > @@ -1060,13 +1053,6 @@ def LOAD_CONST : AMDGPUShaderInst <
> >  
> >  let usesCustomInserter = 1 in {
> >  
> > -def SI_INTERP : InstSI <
> > -  (outs VReg_32:$dst),
> > -  (ins VReg_32:$i, VReg_32:$j, i32imm:$attr_chan, i32imm:$attr, SReg_32:$params),
> > -  "SI_INTERP $dst, $i, $j, $attr_chan, $attr, $params",
> > -  []
> > ->;
> > -
> >  def SI_WQM : InstSI <
> >    (outs),
> >    (ins),
> > @@ -1337,57 +1323,16 @@ def : Pat <
> >  /********** ===================== **********/
> >  
> >  def : Pat <
> > -  (int_SI_fs_interp_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
> > +  (int_SI_fs_constant imm:$attr_chan, imm:$attr, M0Reg:$params),
> >    (V_INTERP_MOV_F32 INTERP.P0, imm:$attr_chan, imm:$attr, M0Reg:$params)
> >  >;
> >  
> >  def : Pat <
> > -  (int_SI_fs_interp_linear_center imm:$attr_chan, imm:$attr, SReg_32:$params),
> > -  (SI_INTERP (f32 LINEAR_CENTER_I), (f32 LINEAR_CENTER_J), imm:$attr_chan,
> > -             imm:$attr, SReg_32:$params)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_interp_linear_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
> > -  (SI_INTERP (f32 LINEAR_CENTROID_I), (f32 LINEAR_CENTROID_J), imm:$attr_chan,
> > -             imm:$attr, SReg_32:$params)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_interp_persp_center imm:$attr_chan, imm:$attr, SReg_32:$params),
> > -  (SI_INTERP (f32 PERSP_CENTER_I), (f32 PERSP_CENTER_J), imm:$attr_chan,
> > -             imm:$attr, SReg_32:$params)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_interp_persp_centroid imm:$attr_chan, imm:$attr, SReg_32:$params),
> > -  (SI_INTERP (f32 PERSP_CENTROID_I), (f32 PERSP_CENTROID_J), imm:$attr_chan,
> > -             imm:$attr, SReg_32:$params)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_read_face),
> > -  (f32 FRONT_FACE)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_read_pos 0),
> > -  (f32 POS_X_FLOAT)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_read_pos 1),
> > -  (f32 POS_Y_FLOAT)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_read_pos 2),
> > -  (f32 POS_Z_FLOAT)
> > ->;
> > -
> > -def : Pat <
> > -  (int_SI_fs_read_pos 3),
> > -  (f32 POS_W_FLOAT)
> > +  (int_SI_fs_interp imm:$attr_chan, imm:$attr, M0Reg:$params, VReg_64:$ij),
> > +  (V_INTERP_P2_F32 (V_INTERP_P1_F32 (EXTRACT_SUBREG VReg_64:$ij, sub0),
> > +                                    imm:$attr_chan, imm:$attr, M0Reg:$params),
> > +                   (EXTRACT_SUBREG VReg_64:$ij, sub1),
> > +                   imm:$attr_chan, imm:$attr, M0Reg:$params)
> >  >;
> >  
> >  /********** ================== **********/
> > diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td
> > index aa17510..7c23d17 100644
> > --- a/lib/Target/R600/SIIntrinsics.td
> > +++ b/lib/Target/R600/SIIntrinsics.td
> > @@ -29,17 +29,8 @@ let TargetPrefix = "SI", isTarget = 1 in {
> >  
> >    /* Interpolation Intrinsics */
> >  
> > -  def int_SI_set_M0 : Intrinsic <[llvm_i32_ty], [llvm_i32_ty]>;
> > -  class Interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
> > -
> > -  def int_SI_fs_interp_linear_center : Interp;
> > -  def int_SI_fs_interp_linear_centroid : Interp;
> > -  def int_SI_fs_interp_persp_center : Interp;
> > -  def int_SI_fs_interp_persp_centroid : Interp;
> > -  def int_SI_fs_interp_constant : Interp;
> > -
> > -  def int_SI_fs_read_face : Intrinsic <[llvm_float_ty], [], [IntrNoMem]>;
> > -  def int_SI_fs_read_pos : Intrinsic <[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>;
> > +  def int_SI_fs_constant : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadMem]>;
> > +  def int_SI_fs_interp : Intrinsic <[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_v2i32_ty], [IntrReadMem]>;
> >  
> >    /* Control flow Intrinsics */
> >  
> > diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp
> > index e5fbf05..1a4e4cb 100644
> > --- a/lib/Target/R600/SIMachineFunctionInfo.cpp
> > +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp
> > @@ -19,8 +19,8 @@ const char *SIMachineFunctionInfo::ShaderTypeAttribute = "ShaderType";
> >  
> >  SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
> >    : MachineFunctionInfo(),
> > -    SPIPSInputAddr(0),
> > -    ShaderType(0) {
> > +    ShaderType(0),
> > +    PSInputAddr(0) {
> >  
> >    AttributeSet Set = MF.getFunction()->getAttributes();
> >    Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
> > diff --git a/lib/Target/R600/SIMachineFunctionInfo.h b/lib/Target/R600/SIMachineFunctionInfo.h
> > index e2f97a4..91a809b 100644
> > --- a/lib/Target/R600/SIMachineFunctionInfo.h
> > +++ b/lib/Target/R600/SIMachineFunctionInfo.h
> > @@ -26,8 +26,8 @@ public:
> >    static const char *ShaderTypeAttribute;
> >  
> >    SIMachineFunctionInfo(const MachineFunction &MF);
> > -  unsigned SPIPSInputAddr;
> >    unsigned ShaderType;
> > +  unsigned PSInputAddr;
> >  };
> >  
> >  } // End namespace llvm
> > diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td
> > index c4bca2c..3dcad50 100644
> > --- a/lib/Target/R600/SIRegisterInfo.td
> > +++ b/lib/Target/R600/SIRegisterInfo.td
> > @@ -34,32 +34,6 @@ foreach Index = 0-255 in {
> >    }
> >  }
> >  
> > -// virtual Interpolation registers
> > -def PERSP_SAMPLE_I : SIReg <"PERSP_SAMPLE_I">;
> > -def PERSP_SAMPLE_J : SIReg <"PERSP_SAMPLE_J">;
> > -def PERSP_CENTER_I : SIReg <"PERSP_CENTER_I">;
> > -def PERSP_CENTER_J : SIReg <"PERSP_CENTER_J">;
> > -def PERSP_CENTROID_I : SIReg <"PERSP_CENTROID_I">;
> > -def PERSP_CENTROID_J : SIReg <"PERP_CENTROID_J">;
> > -def PERSP_I_W : SIReg <"PERSP_I_W">;
> > -def PERSP_J_W : SIReg <"PERSP_J_W">;
> > -def PERSP_1_W : SIReg <"PERSP_1_W">;
> > -def LINEAR_SAMPLE_I : SIReg <"LINEAR_SAMPLE_I">;
> > -def LINEAR_SAMPLE_J : SIReg <"LINEAR_SAMPLE_J">;
> > -def LINEAR_CENTER_I : SIReg <"LINEAR_CENTER_I">;
> > -def LINEAR_CENTER_J : SIReg <"LINEAR_CENTER_J">;
> > -def LINEAR_CENTROID_I : SIReg <"LINEAR_CENTROID_I">;
> > -def LINEAR_CENTROID_J : SIReg <"LINEAR_CENTROID_J">;
> > -def LINE_STIPPLE_TEX_COORD : SIReg <"LINE_STIPPLE_TEX_COORD">;
> > -def POS_X_FLOAT : SIReg <"POS_X_FLOAT">;
> > -def POS_Y_FLOAT : SIReg <"POS_Y_FLOAT">;
> > -def POS_Z_FLOAT : SIReg <"POS_Z_FLOAT">;
> > -def POS_W_FLOAT : SIReg <"POS_W_FLOAT">;
> > -def FRONT_FACE : SIReg <"FRONT_FACE">;
> > -def ANCILLARY : SIReg <"ANCILLARY">;
> > -def SAMPLE_COVERAGE : SIReg <"SAMPLE_COVERAGE">;
> > -def POS_FIXED_PT : SIReg <"POS_FIXED_PT">;
> > -
> >  //===----------------------------------------------------------------------===//
> >  //  Groupings using register classes and tuples
> >  //===----------------------------------------------------------------------===//
> > @@ -202,26 +176,7 @@ def SSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add SReg_32)>;
> >  
> >  def SSrc_64 : RegisterClass<"AMDGPU", [i64, i1], 64, (add SReg_64)>;
> >  
> > -def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
> > -  (add VReg_32, SReg_32,
> > -    PERSP_SAMPLE_I, PERSP_SAMPLE_J,
> > -    PERSP_CENTER_I, PERSP_CENTER_J,
> > -    PERSP_CENTROID_I, PERSP_CENTROID_J,
> > -    PERSP_I_W, PERSP_J_W, PERSP_1_W,
> > -    LINEAR_SAMPLE_I, LINEAR_SAMPLE_J,
> > -    LINEAR_CENTER_I, LINEAR_CENTER_J,
> > -    LINEAR_CENTROID_I, LINEAR_CENTROID_J,
> > -    LINE_STIPPLE_TEX_COORD,
> > -    POS_X_FLOAT,
> > -    POS_Y_FLOAT,
> > -    POS_Z_FLOAT,
> > -    POS_W_FLOAT,
> > -    FRONT_FACE,
> > -    ANCILLARY,
> > -    SAMPLE_COVERAGE,
> > -    POS_FIXED_PT
> > -  )
> > ->;
> > +def VSrc_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VReg_32, SReg_32)>;
> >  
> >  def VSrc_64 : RegisterClass<"AMDGPU", [i64], 64, (add VReg_64, SReg_64)>;
> >  
> > -- 
> > 1.7.10.4
> > 
> 
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits




More information about the llvm-commits mailing list