[llvm-commits] [llvm] r169915 - in /llvm/trunk: include/llvm/ lib/Target/ lib/Target/R600/ lib/Target/R600/InstPrinter/ lib/Target/R600/MCTargetDesc/ lib/Target/R600/TargetInfo/ test/CodeGen/R600/ test/CodeGen/SI/

Tue Dec 11 16:35:29 PST 2012

All that patience finally paid off. Congrats!

-- Sean Silva

On Tue, Dec 11, 2012 at 4:25 PM, Tom Stellard <thomas.stellard at amd.com>wrote:

> Author: tstellar
> Date: Tue Dec 11 15:25:42 2012
> New Revision: 169915
>
> URL: http://llvm.org/viewvc/llvm-project?rev=169915&view=rev
> Log:
> Add R600 backend
>
> A new backend supporting AMD GPUs: Radeon HD2XXX - HD7XXX
>
> Added:
>     llvm/trunk/include/llvm/IntrinsicsR600.td
>     llvm/trunk/lib/Target/R600/
>     llvm/trunk/lib/Target/R600/AMDGPU.h
>     llvm/trunk/lib/Target/R600/AMDGPU.td
>     llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp
>     llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.h
>     llvm/trunk/lib/Target/R600/AMDGPUCodeEmitter.h
>     llvm/trunk/lib/Target/R600/AMDGPUConvertToISA.cpp
>     llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
>     llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
>     llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.cpp
>     llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.h
>     llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td
>     llvm/trunk/lib/Target/R600/AMDGPUInstructions.td
>     llvm/trunk/lib/Target/R600/AMDGPUIntrinsics.td
>     llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp
>     llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.h
>     llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.cpp
>     llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.h
>     llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.td
>     llvm/trunk/lib/Target/R600/AMDGPUSubtarget.cpp
>     llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h
>     llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
>     llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.h
>     llvm/trunk/lib/Target/R600/AMDIL.h
>     llvm/trunk/lib/Target/R600/AMDIL7XXDevice.cpp
>     llvm/trunk/lib/Target/R600/AMDIL7XXDevice.h
>     llvm/trunk/lib/Target/R600/AMDILBase.td
>     llvm/trunk/lib/Target/R600/AMDILCFGStructurizer.cpp
>     llvm/trunk/lib/Target/R600/AMDILDevice.cpp
>     llvm/trunk/lib/Target/R600/AMDILDevice.h
>     llvm/trunk/lib/Target/R600/AMDILDeviceInfo.cpp
>     llvm/trunk/lib/Target/R600/AMDILDeviceInfo.h
>     llvm/trunk/lib/Target/R600/AMDILDevices.h
>     llvm/trunk/lib/Target/R600/AMDILEvergreenDevice.cpp
>     llvm/trunk/lib/Target/R600/AMDILEvergreenDevice.h
>     llvm/trunk/lib/Target/R600/AMDILFrameLowering.cpp
>     llvm/trunk/lib/Target/R600/AMDILFrameLowering.h
>     llvm/trunk/lib/Target/R600/AMDILISelDAGToDAG.cpp
>     llvm/trunk/lib/Target/R600/AMDILISelLowering.cpp
>     llvm/trunk/lib/Target/R600/AMDILInstrInfo.td
>     llvm/trunk/lib/Target/R600/AMDILIntrinsicInfo.cpp
>     llvm/trunk/lib/Target/R600/AMDILIntrinsicInfo.h
>     llvm/trunk/lib/Target/R600/AMDILIntrinsics.td
>     llvm/trunk/lib/Target/R600/AMDILNIDevice.cpp
>     llvm/trunk/lib/Target/R600/AMDILNIDevice.h
>     llvm/trunk/lib/Target/R600/AMDILPeepholeOptimizer.cpp
>     llvm/trunk/lib/Target/R600/AMDILRegisterInfo.td
>     llvm/trunk/lib/Target/R600/AMDILSIDevice.cpp
>     llvm/trunk/lib/Target/R600/AMDILSIDevice.h
>     llvm/trunk/lib/Target/R600/CMakeLists.txt
>     llvm/trunk/lib/Target/R600/InstPrinter/
>     llvm/trunk/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp
>     llvm/trunk/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h
>     llvm/trunk/lib/Target/R600/InstPrinter/CMakeLists.txt
>     llvm/trunk/lib/Target/R600/InstPrinter/LLVMBuild.txt
>     llvm/trunk/lib/Target/R600/InstPrinter/Makefile
>     llvm/trunk/lib/Target/R600/LLVMBuild.txt
>     llvm/trunk/lib/Target/R600/MCTargetDesc/
>     llvm/trunk/lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp
>     llvm/trunk/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp
>     llvm/trunk/lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h
>     llvm/trunk/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h
>     llvm/trunk/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp
>     llvm/trunk/lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h
>     llvm/trunk/lib/Target/R600/MCTargetDesc/CMakeLists.txt
>     llvm/trunk/lib/Target/R600/MCTargetDesc/LLVMBuild.txt
>     llvm/trunk/lib/Target/R600/MCTargetDesc/Makefile
>     llvm/trunk/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp
>     llvm/trunk/lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp
>     llvm/trunk/lib/Target/R600/Makefile
>     llvm/trunk/lib/Target/R600/Processors.td
>     llvm/trunk/lib/Target/R600/R600Defines.h
>     llvm/trunk/lib/Target/R600/R600ExpandSpecialInstrs.cpp
>     llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
>     llvm/trunk/lib/Target/R600/R600ISelLowering.h
>     llvm/trunk/lib/Target/R600/R600InstrInfo.cpp
>     llvm/trunk/lib/Target/R600/R600InstrInfo.h
>     llvm/trunk/lib/Target/R600/R600Instructions.td
>     llvm/trunk/lib/Target/R600/R600Intrinsics.td
>     llvm/trunk/lib/Target/R600/R600MachineFunctionInfo.cpp
>     llvm/trunk/lib/Target/R600/R600MachineFunctionInfo.h
>     llvm/trunk/lib/Target/R600/R600RegisterInfo.cpp
>     llvm/trunk/lib/Target/R600/R600RegisterInfo.h
>     llvm/trunk/lib/Target/R600/R600RegisterInfo.td
>     llvm/trunk/lib/Target/R600/R600Schedule.td
>     llvm/trunk/lib/Target/R600/SIAssignInterpRegs.cpp
>     llvm/trunk/lib/Target/R600/SIFixSGPRLiveness.cpp
>     llvm/trunk/lib/Target/R600/SIISelLowering.cpp
>     llvm/trunk/lib/Target/R600/SIISelLowering.h
>     llvm/trunk/lib/Target/R600/SIInstrFormats.td
>     llvm/trunk/lib/Target/R600/SIInstrInfo.cpp
>     llvm/trunk/lib/Target/R600/SIInstrInfo.h
>     llvm/trunk/lib/Target/R600/SIInstrInfo.td
>     llvm/trunk/lib/Target/R600/SIInstructions.td
>     llvm/trunk/lib/Target/R600/SIIntrinsics.td
>     llvm/trunk/lib/Target/R600/SILowerControlFlow.cpp
>     llvm/trunk/lib/Target/R600/SILowerLiteralConstants.cpp
>     llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.cpp
>     llvm/trunk/lib/Target/R600/SIMachineFunctionInfo.h
>     llvm/trunk/lib/Target/R600/SIRegisterInfo.cpp
>     llvm/trunk/lib/Target/R600/SIRegisterInfo.h
>     llvm/trunk/lib/Target/R600/SIRegisterInfo.td
>     llvm/trunk/lib/Target/R600/SISchedule.td
>     llvm/trunk/lib/Target/R600/TargetInfo/
>     llvm/trunk/lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp
>     llvm/trunk/lib/Target/R600/TargetInfo/CMakeLists.txt
>     llvm/trunk/lib/Target/R600/TargetInfo/LLVMBuild.txt
>     llvm/trunk/lib/Target/R600/TargetInfo/Makefile
>     llvm/trunk/test/CodeGen/R600/add.v4i32.ll
>     llvm/trunk/test/CodeGen/R600/and.v4i32.ll
>     llvm/trunk/test/CodeGen/R600/fabs.ll
>     llvm/trunk/test/CodeGen/R600/fadd.ll
>     llvm/trunk/test/CodeGen/R600/fadd.v4f32.ll
>     llvm/trunk/test/CodeGen/R600/fcmp-cnd.ll
>     llvm/trunk/test/CodeGen/R600/fcmp-cnde-int-args.ll
>     llvm/trunk/test/CodeGen/R600/fcmp.ll
>     llvm/trunk/test/CodeGen/R600/fdiv.v4f32.ll
>     llvm/trunk/test/CodeGen/R600/floor.ll
>     llvm/trunk/test/CodeGen/R600/fmax.ll
>     llvm/trunk/test/CodeGen/R600/fmin.ll
>     llvm/trunk/test/CodeGen/R600/fmul.ll
>     llvm/trunk/test/CodeGen/R600/fmul.v4f32.ll
>     llvm/trunk/test/CodeGen/R600/fsub.ll
>     llvm/trunk/test/CodeGen/R600/fsub.v4f32.ll
>     llvm/trunk/test/CodeGen/R600/i8_to_double_to_float.ll
>     llvm/trunk/test/CodeGen/R600/icmp-select-sete-reverse-args.ll
>     llvm/trunk/test/CodeGen/R600/lit.local.cfg
>     llvm/trunk/test/CodeGen/R600/literals.ll
>     llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.mul.ll
>     llvm/trunk/test/CodeGen/R600/llvm.AMDGPU.trunc.ll
>     llvm/trunk/test/CodeGen/R600/llvm.cos.ll
>     llvm/trunk/test/CodeGen/R600/llvm.pow.ll
>     llvm/trunk/test/CodeGen/R600/llvm.sin.ll
>     llvm/trunk/test/CodeGen/R600/load.constant_addrspace.f32.ll
>     llvm/trunk/test/CodeGen/R600/load.i8.ll
>     llvm/trunk/test/CodeGen/R600/reciprocal.ll
>     llvm/trunk/test/CodeGen/R600/sdiv.ll
>     llvm/trunk/test/CodeGen/R600/selectcc-icmp-select-float.ll
>     llvm/trunk/test/CodeGen/R600/selectcc_cnde.ll
>     llvm/trunk/test/CodeGen/R600/selectcc_cnde_int.ll
>     llvm/trunk/test/CodeGen/R600/setcc.v4i32.ll
>     llvm/trunk/test/CodeGen/R600/short-args.ll
>     llvm/trunk/test/CodeGen/R600/store.v4f32.ll
>     llvm/trunk/test/CodeGen/R600/store.v4i32.ll
>     llvm/trunk/test/CodeGen/R600/udiv.v4i32.ll
>     llvm/trunk/test/CodeGen/R600/urem.v4i32.ll
>     llvm/trunk/test/CodeGen/SI/
>     llvm/trunk/test/CodeGen/SI/sanity.ll
> Modified:
>     llvm/trunk/include/llvm/Intrinsics.td
>     llvm/trunk/lib/Target/LLVMBuild.txt
>
> Modified: llvm/trunk/include/llvm/Intrinsics.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Intrinsics.td?rev=169915&r1=169914&r2=169915&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/Intrinsics.td (original)
> +++ llvm/trunk/include/llvm/Intrinsics.td Tue Dec 11 15:25:42 2012
> @@ -472,3 +472,4 @@
>  include "llvm/IntrinsicsHexagon.td"
>  include "llvm/IntrinsicsNVVM.td"
>  include "llvm/IntrinsicsMips.td"
> +include "llvm/IntrinsicsR600.td"
>
> Added: llvm/trunk/include/llvm/IntrinsicsR600.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IntrinsicsR600.td?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/include/llvm/IntrinsicsR600.td (added)
> +++ llvm/trunk/include/llvm/IntrinsicsR600.td Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,36 @@
> +//===- IntrinsicsR600.td - Defines R600 intrinsics ---------*- tablegen
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines all of the R600-specific intrinsics.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +let TargetPrefix = "r600" in {
> +
> +class R600ReadPreloadRegisterIntrinsic<string name>
> +  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> +    GCCBuiltin<name>;
> +
> +multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
> +  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
> +  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
> +  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
> +}
> +
> +defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
> +                                       "__builtin_r600_read_global_size">;
> +defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
> +                                       "__builtin_r600_read_local_size">;
> +defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
> +                                       "__builtin_r600_read_ngroups">;
> +defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
> +                                       "__builtin_r600_read_tgid">;
> +defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
> +                                       "__builtin_r600_read_tidig">;
> +} // End TargetPrefix = "r600"
>
> Modified: llvm/trunk/lib/Target/LLVMBuild.txt
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/LLVMBuild.txt?rev=169915&r1=169914&r2=169915&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/LLVMBuild.txt (original)
> +++ llvm/trunk/lib/Target/LLVMBuild.txt Tue Dec 11 15:25:42 2012
> @@ -16,7 +16,7 @@
>
>  ;===------------------------------------------------------------------------===;
>
>  [common]
> -subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC
> Sparc X86 XCore
> +subdirectories = ARM CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC
> R600 Sparc X86 XCore
>
>  ; This is a special group whose required libraries are extended (by
> llvm-build)
>  ; with the best execution engine (the native JIT, if available, or the
>
> Added: llvm/trunk/lib/Target/R600/AMDGPU.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPU.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPU.h Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,48 @@
> +//===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++
> -*-=//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +/// \file
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPU_H
> +#define AMDGPU_H
> +
> +#include "AMDGPUTargetMachine.h"
> +#include "llvm/Support/TargetRegistry.h"
> +#include "llvm/Target/TargetMachine.h"
> +
> +namespace llvm {
> +
> +class FunctionPass;
> +class AMDGPUTargetMachine;
> +
> +// R600 Passes
> +FunctionPass* createR600KernelParametersPass(const DataLayout *TD);
> +FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
> +
> +// SI Passes
> +FunctionPass *createSIAssignInterpRegsPass(TargetMachine &tm);
> +FunctionPass *createSILowerControlFlowPass(TargetMachine &tm);
> +FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
> +FunctionPass *createSILowerLiteralConstantsPass(TargetMachine &tm);
> +FunctionPass *createSIFixSGPRLivenessPass(TargetMachine &tm);
> +
> +// Passes common to R600 and SI
> +FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm);
> +
> +} // End namespace llvm
> +
> +namespace ShaderType {
> +  enum Type {
> +    PIXEL = 0,
> +    VERTEX = 1,
> +    GEOMETRY = 2,
> +    COMPUTE = 3
> +  };
> +}
> +
> +#endif // AMDGPU_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPU.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPU.td?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPU.td (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPU.td Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,40 @@
> +//===-- AMDIL.td - AMDIL Tablegen files --*- tablegen
> -*-------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//==-----------------------------------------------------------------------===//
> +
> +// Include AMDIL TD files
> +include "AMDILBase.td"
> +
> +
> +def AMDGPUInstrInfo : InstrInfo {
> +  let guessInstructionProperties = 1;
> +}
> +
>
> +//===----------------------------------------------------------------------===//
> +// Declare the target which we are implementing
>
> +//===----------------------------------------------------------------------===//
> +def AMDGPUAsmWriter : AsmWriter {
> +    string AsmWriterClassName = "InstPrinter";
> +    int Variant = 0;
> +    bit isMCAsmWriter = 1;
> +}
> +
> +def AMDGPU : Target {
> +  // Pull in Instruction Info:
> +  let InstructionSet = AMDGPUInstrInfo;
> +  let AssemblyWriters = [AMDGPUAsmWriter];
> +}
> +
> +// Include AMDGPU TD files
> +include "R600Schedule.td"
> +include "SISchedule.td"
> +include "Processors.td"
> +include "AMDGPUInstrInfo.td"
> +include "AMDGPUIntrinsics.td"
> +include "AMDGPURegisterInfo.td"
> +include "AMDGPUInstructions.td"
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.cpp Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,138 @@
> +//===-- AMDGPUAsmPrinter.cpp - AMDGPU Assebly printer
>  --------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +///
> +/// The AMDGPUAsmPrinter is used to print both assembly string and also
> binary
> +/// code.  When passed an MCAsmStreamer it prints assembly and when passed
> +/// an MCObjectStreamer it outputs binary code.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +
> +
> +#include "AMDGPUAsmPrinter.h"
> +#include "AMDGPU.h"
> +#include "SIMachineFunctionInfo.h"
> +#include "SIRegisterInfo.h"
> +#include "llvm/MC/MCStreamer.h"
> +#include "llvm/Target/TargetLoweringObjectFile.h"
> +#include "llvm/Support/TargetRegistry.h"
> +
> +using namespace llvm;
> +
> +
> +static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm,
> +                                              MCStreamer &Streamer) {
> +  return new AMDGPUAsmPrinter(tm, Streamer);
> +}
> +
> +extern "C" void LLVMInitializeR600AsmPrinter() {
> +  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget,
> createAMDGPUAsmPrinterPass);
> +}
> +
> +/// We need to override this function so we can avoid
> +/// the call to EmitFunctionHeader(), which the MCPureStreamer can't
> handle.
> +bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
> +  const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
> +  if (STM.dumpCode()) {
> +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
> +    MF.dump();
> +#endif
> +  }
> +  SetupMachineFunction(MF);
> +  OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
> +  if (STM.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
> +    EmitProgramInfo(MF);
> +  }
> +  EmitFunctionBody();
> +  return false;
> +}
> +
> +void AMDGPUAsmPrinter::EmitProgramInfo(MachineFunction &MF) {
> +  unsigned MaxSGPR = 0;
> +  unsigned MaxVGPR = 0;
> +  bool VCCUsed = false;
> +  const SIRegisterInfo * RI =
> +                static_cast<const SIRegisterInfo*>(TM.getRegisterInfo());
> +
> +  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> +                                                  BB != BB_E; ++BB) {
> +    MachineBasicBlock &MBB = *BB;
> +    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> +                                                    I != E; ++I) {
> +      MachineInstr &MI = *I;
> +
> +      unsigned numOperands = MI.getNumOperands();
> +      for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
> +        MachineOperand & MO = MI.getOperand(op_idx);
> +        unsigned maxUsed;
> +        unsigned width = 0;
> +        bool isSGPR = false;
> +        unsigned reg;
> +        unsigned hwReg;
> +        if (!MO.isReg()) {
> +          continue;
> +        }
> +        reg = MO.getReg();
> +        if (reg == AMDGPU::VCC) {
> +          VCCUsed = true;
> +          continue;
> +        }
> +        switch (reg) {
> +        default: break;
> +        case AMDGPU::EXEC:
> +        case AMDGPU::SI_LITERAL_CONSTANT:
> +        case AMDGPU::SREG_LIT_0:
> +        case AMDGPU::M0:
> +          continue;
> +        }
> +
> +        if (AMDGPU::SReg_32RegClass.contains(reg)) {
> +          isSGPR = true;
> +          width = 1;
> +        } else if (AMDGPU::VReg_32RegClass.contains(reg)) {
> +          isSGPR = false;
> +          width = 1;
> +        } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
> +          isSGPR = true;
> +          width = 2;
> +        } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
> +          isSGPR = false;
> +          width = 2;
> +        } else if (AMDGPU::SReg_128RegClass.contains(reg)) {
> +          isSGPR = true;
> +          width = 4;
> +        } else if (AMDGPU::VReg_128RegClass.contains(reg)) {
> +          isSGPR = false;
> +          width = 4;
> +        } else if (AMDGPU::SReg_256RegClass.contains(reg)) {
> +          isSGPR = true;
> +          width = 8;
> +        } else {
> +          assert(!"Unknown register class");
> +        }
> +        hwReg = RI->getEncodingValue(reg);
> +        maxUsed = hwReg + width - 1;
> +        if (isSGPR) {
> +          MaxSGPR = maxUsed > MaxSGPR ? maxUsed : MaxSGPR;
> +        } else {
> +          MaxVGPR = maxUsed > MaxVGPR ? maxUsed : MaxVGPR;
> +        }
> +      }
> +    }
> +  }
> +  if (VCCUsed) {
> +    MaxSGPR += 2;
> +  }
> +  SIMachineFunctionInfo * MFI = MF.getInfo<SIMachineFunctionInfo>();
> +  OutStreamer.EmitIntValue(MaxSGPR + 1, 4);
> +  OutStreamer.EmitIntValue(MaxVGPR + 1, 4);
> +  OutStreamer.EmitIntValue(MFI->SPIPSInputAddr, 4);
> +}
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUAsmPrinter.h Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,44 @@
> +//===-- AMDGPUAsmPrinter.h - Print AMDGPU assembly code
> -------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief AMDGPU Assembly printer class.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPU_ASMPRINTER_H
> +#define AMDGPU_ASMPRINTER_H
> +
> +#include "llvm/CodeGen/AsmPrinter.h"
> +
> +namespace llvm {
> +
> +class AMDGPUAsmPrinter : public AsmPrinter {
> +
> +public:
> +  explicit AMDGPUAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
> +    : AsmPrinter(TM, Streamer) { }
> +
> +  virtual bool runOnMachineFunction(MachineFunction &MF);
> +
> +  virtual const char *getPassName() const {
> +    return "AMDGPU Assembly Printer";
> +  }
> +
> +  /// \brief Emit register usage information so that the GPU driver
> +  /// can correctly setup the GPU state.
> +  void EmitProgramInfo(MachineFunction &MF);
> +
> +  /// Implemented in AMDGPUMCInstLower.cpp
> +  virtual void EmitInstruction(const MachineInstr *MI);
> +};
> +
> +} // End anonymous llvm
> +
> +#endif //AMDGPU_ASMPRINTER_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUCodeEmitter.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUCodeEmitter.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUCodeEmitter.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUCodeEmitter.h Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,49 @@
> +//===-- AMDGPUCodeEmitter.h - AMDGPU Code Emitter interface
> -----------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief CodeEmitter interface for R600 and SI codegen.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPUCODEEMITTER_H
> +#define AMDGPUCODEEMITTER_H
> +
> +namespace llvm {
> +
> +class AMDGPUCodeEmitter {
> +public:
> +  uint64_t getBinaryCodeForInstr(const MachineInstr &MI) const;
> +  virtual uint64_t getMachineOpValue(const MachineInstr &MI,
> +                                   const MachineOperand &MO) const {
> return 0; }
> +  virtual unsigned GPR4AlignEncode(const MachineInstr  &MI,
> +                                     unsigned OpNo) const {
> +    return 0;
> +  }
> +  virtual unsigned GPR2AlignEncode(const MachineInstr &MI,
> +                                   unsigned OpNo) const {
> +    return 0;
> +  }
> +  virtual uint64_t VOPPostEncode(const MachineInstr &MI,
> +                                 uint64_t Value) const {
> +    return Value;
> +  }
> +  virtual uint64_t i32LiteralEncode(const MachineInstr &MI,
> +                                    unsigned OpNo) const {
> +    return 0;
> +  }
> +  virtual uint32_t SMRDmemriEncode(const MachineInstr &MI, unsigned OpNo)
> +                                                                   const {
> +    return 0;
> +  }
> +};
> +
> +} // End namespace llvm
> +
> +#endif // AMDGPUCODEEMITTER_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUConvertToISA.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUConvertToISA.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUConvertToISA.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUConvertToISA.cpp Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,62 @@
> +//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA
> --------------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief This pass lowers AMDIL machine instructions to the appropriate
> +/// hardware instructions.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPU.h"
> +#include "AMDGPUInstrInfo.h"
> +#include "llvm/CodeGen/MachineFunctionPass.h"
> +
> +using namespace llvm;
> +
> +namespace {
> +
> +class AMDGPUConvertToISAPass : public MachineFunctionPass {
> +
> +private:
> +  static char ID;
> +  TargetMachine &TM;
> +
> +public:
> +  AMDGPUConvertToISAPass(TargetMachine &tm) :
> +    MachineFunctionPass(ID), TM(tm) { }
> +
> +  virtual bool runOnMachineFunction(MachineFunction &MF);
> +
> +  virtual const char *getPassName() const {return "AMDGPU Convert to
> ISA";}
> +
> +};
> +
> +} // End anonymous namespace
> +
> +char AMDGPUConvertToISAPass::ID = 0;
> +
> +FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) {
> +  return new AMDGPUConvertToISAPass(tm);
> +}
> +
> +bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) {
> +  const AMDGPUInstrInfo * TII =
> +                      static_cast<const
> AMDGPUInstrInfo*>(TM.getInstrInfo());
> +
> +  for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
> +                                                  BB != BB_E; ++BB) {
> +    MachineBasicBlock &MBB = *BB;
> +    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> +                                                      I != E; ++I) {
> +      MachineInstr &MI = *I;
> +      TII->convertToISA(MI, MF, MBB.findDebugLoc(I));
> +    }
> +  }
> +  return false;
> +}
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,417 @@
> +//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions
> -----===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief This is the parent TargetLowering class for hardware code gen
> +/// targets.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPUISelLowering.h"
> +#include "AMDILIntrinsicInfo.h"
> +#include "llvm/CodeGen/MachineFunction.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +#include "llvm/CodeGen/SelectionDAG.h"
> +#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
> +
> +using namespace llvm;
> +
> +AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
> +  TargetLowering(TM, new TargetLoweringObjectFileELF()) {
> +
> +  // Initialize target lowering borrowed from AMDIL
> +  InitAMDILLowering();
> +
> +  // We need to custom lower some of the intrinsics
> +  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
> +
> +  // Library functions.  These default to Expand, but we have instructions
> +  // for them.
> +  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
> +  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
> +  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
> +  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
> +  setOperationAction(ISD::FABS,   MVT::f32, Legal);
> +  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
> +  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
> +
> +  // Lower floating point store/load to integer store/load to reduce the
> number
> +  // of patterns in tablegen.
> +  setOperationAction(ISD::STORE, MVT::f32, Promote);
> +  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
> +
> +  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
> +  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
> +
> +  setOperationAction(ISD::LOAD, MVT::f32, Promote);
> +  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
> +
> +  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
> +  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
> +
> +  setOperationAction(ISD::UDIV, MVT::i32, Expand);
> +  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
> +  setOperationAction(ISD::UREM, MVT::i32, Expand);
> +}
> +
>
> +//===---------------------------------------------------------------------===//
> +// TargetLowering Callbacks
>
> +//===---------------------------------------------------------------------===//
> +
> +SDValue AMDGPUTargetLowering::LowerFormalArguments(
> +                                      SDValue Chain,
> +                                      CallingConv::ID CallConv,
> +                                      bool isVarArg,
> +                                      const
> SmallVectorImpl<ISD::InputArg> &Ins,
> +                                      DebugLoc DL, SelectionDAG &DAG,
> +                                      SmallVectorImpl<SDValue> &InVals)
> const {
> +  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
> +    InVals.push_back(SDValue());
> +  }
> +  return Chain;
> +}
> +
> +SDValue AMDGPUTargetLowering::LowerReturn(
> +                                     SDValue Chain,
> +                                     CallingConv::ID CallConv,
> +                                     bool isVarArg,
> +                                     const
> SmallVectorImpl<ISD::OutputArg> &Outs,
> +                                     const SmallVectorImpl<SDValue>
> &OutVals,
> +                                     DebugLoc DL, SelectionDAG &DAG)
> const {
> +  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
> +}
> +
>
> +//===---------------------------------------------------------------------===//
> +// Target specific lowering
>
> +//===---------------------------------------------------------------------===//
> +
> +SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG
> &DAG)
> +    const {
> +  switch (Op.getOpcode()) {
> +  default:
> +    Op.getNode()->dump();
> +    assert(0 && "Custom lowering code for this"
> +        "instruction is not implemented yet!");
> +    break;
> +  // AMDIL DAG lowering
> +  case ISD::SDIV: return LowerSDIV(Op, DAG);
> +  case ISD::SREM: return LowerSREM(Op, DAG);
> +  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
> +  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
> +  // AMDGPU DAG lowering
> +  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
> +  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
> +  }
> +  return Op;
> +}
> +
> +SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> +    SelectionDAG &DAG) const {
> +  unsigned IntrinsicID =
> cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> +  DebugLoc DL = Op.getDebugLoc();
> +  EVT VT = Op.getValueType();
> +
> +  switch (IntrinsicID) {
> +    default: return Op;
> +    case AMDGPUIntrinsic::AMDIL_abs:
> +      return LowerIntrinsicIABS(Op, DAG);
> +    case AMDGPUIntrinsic::AMDIL_exp:
> +      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
> +    case AMDGPUIntrinsic::AMDGPU_lrp:
> +      return LowerIntrinsicLRP(Op, DAG);
> +    case AMDGPUIntrinsic::AMDIL_fraction:
> +      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
> +    case AMDGPUIntrinsic::AMDIL_mad:
> +      return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
> +                              Op.getOperand(2), Op.getOperand(3));
> +    case AMDGPUIntrinsic::AMDIL_max:
> +      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
> +                                                  Op.getOperand(2));
> +    case AMDGPUIntrinsic::AMDGPU_imax:
> +      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
> +                                                  Op.getOperand(2));
> +    case AMDGPUIntrinsic::AMDGPU_umax:
> +      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
> +                                                  Op.getOperand(2));
> +    case AMDGPUIntrinsic::AMDIL_min:
> +      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
> +                                                  Op.getOperand(2));
> +    case AMDGPUIntrinsic::AMDGPU_imin:
> +      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
> +                                                  Op.getOperand(2));
> +    case AMDGPUIntrinsic::AMDGPU_umin:
> +      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
> +                                                  Op.getOperand(2));
> +    case AMDGPUIntrinsic::AMDIL_round_nearest:
> +      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
> +  }
> +}
> +
> +///IABS(a) = SMAX(sub(0, a), a)
> +SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
> +    SelectionDAG &DAG) const {
> +
> +  DebugLoc DL = Op.getDebugLoc();
> +  EVT VT = Op.getValueType();
> +  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
> +                                              Op.getOperand(1));
> +
> +  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
> +}
> +
> +/// Linear Interpolation
> +/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
> +SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
> +    SelectionDAG &DAG) const {
> +  DebugLoc DL = Op.getDebugLoc();
> +  EVT VT = Op.getValueType();
> +  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
> +                                DAG.getConstantFP(1.0f, MVT::f32),
> +                                Op.getOperand(1));
> +  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
> +                                                    Op.getOperand(3));
> +  return DAG.getNode(AMDGPUISD::MAD, DL, VT, Op.getOperand(1),
> +                                               Op.getOperand(2),
> +                                               OneSubAC);
> +}
> +
> +/// \brief Generate Min/Max node
> +SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
> +    SelectionDAG &DAG) const {
> +  DebugLoc DL = Op.getDebugLoc();
> +  EVT VT = Op.getValueType();
> +
> +  SDValue LHS = Op.getOperand(0);
> +  SDValue RHS = Op.getOperand(1);
> +  SDValue True = Op.getOperand(2);
> +  SDValue False = Op.getOperand(3);
> +  SDValue CC = Op.getOperand(4);
> +
> +  if (VT != MVT::f32 ||
> +      !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
> +    return SDValue();
> +  }
> +
> +  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
> +  switch (CCOpcode) {
> +  case ISD::SETOEQ:
> +  case ISD::SETONE:
> +  case ISD::SETUNE:
> +  case ISD::SETNE:
> +  case ISD::SETUEQ:
> +  case ISD::SETEQ:
> +  case ISD::SETFALSE:
> +  case ISD::SETFALSE2:
> +  case ISD::SETTRUE:
> +  case ISD::SETTRUE2:
> +  case ISD::SETUO:
> +  case ISD::SETO:
> +    assert(0 && "Operation should already be optimised !");
> +  case ISD::SETULE:
> +  case ISD::SETULT:
> +  case ISD::SETOLE:
> +  case ISD::SETOLT:
> +  case ISD::SETLE:
> +  case ISD::SETLT: {
> +    if (LHS == True)
> +      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
> +    else
> +      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
> +  }
> +  case ISD::SETGT:
> +  case ISD::SETGE:
> +  case ISD::SETUGE:
> +  case ISD::SETOGE:
> +  case ISD::SETUGT:
> +  case ISD::SETOGT: {
> +    if (LHS == True)
> +      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
> +    else
> +      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
> +  }
> +  case ISD::SETCC_INVALID:
> +    assert(0 && "Invalid setcc condcode !");
> +  }
> +  return Op;
> +}
> +
> +
> +
> +SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
> +    SelectionDAG &DAG) const {
> +  DebugLoc DL = Op.getDebugLoc();
> +  EVT VT = Op.getValueType();
> +
> +  SDValue Num = Op.getOperand(0);
> +  SDValue Den = Op.getOperand(1);
> +
> +  SmallVector<SDValue, 8> Results;
> +
> +  // RCP =  URECIP(Den) = 2^32 / Den + e
> +  // e is rounding error.
> +  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
> +
> +  // RCP_LO = umulo(RCP, Den) */
> +  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
> +
> +  // RCP_HI = mulhu (RCP, Den) */
> +  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
> +
> +  // NEG_RCP_LO = -RCP_LO
> +  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0,
> VT),
> +                                                     RCP_LO);
> +
> +  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
> +  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
> +                                           NEG_RCP_LO, RCP_LO,
> +                                           ISD::SETEQ);
> +  // Calculate the rounding error from the URECIP instruction
> +  // E = mulhu(ABS_RCP_LO, RCP)
> +  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
> +
> +  // RCP_A_E = RCP + E
> +  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
> +
> +  // RCP_S_E = RCP - E
> +  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
> +
> +  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
> +  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
> +                                     RCP_A_E, RCP_S_E,
> +                                     ISD::SETEQ);
> +  // Quotient = mulhu(Tmp0, Num)
> +  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
> +
> +  // Num_S_Remainder = Quotient * Den
> +  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient,
> Den);
> +
> +  // Remainder = Num - Num_S_Remainder
> +  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
> +
> +  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
> +  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
> +                                                 DAG.getConstant(-1, VT),
> +                                                 DAG.getConstant(0, VT),
> +                                                 ISD::SETGE);
> +  // Remainder_GE_Zero = (Remainder >= 0 ? -1 : 0)
> +  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Remainder,
> +                                                  DAG.getConstant(0, VT),
> +                                                  DAG.getConstant(-1, VT),
> +                                                  DAG.getConstant(0, VT),
> +                                                  ISD::SETGE);
> +  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
> +  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
> +                                               Remainder_GE_Zero);
> +
> +  // Calculate Division result:
> +
> +  // Quotient_A_One = Quotient + 1
> +  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
> +
> DAG.getConstant(1, VT));
> +
> +  // Quotient_S_One = Quotient - 1
> +  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
> +
> DAG.getConstant(1, VT));
> +
> +  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
> +  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
> +                                     Quotient, Quotient_A_One,
> ISD::SETEQ);
> +
> +  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
> +  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
> +                            Quotient_S_One, Div, ISD::SETEQ);
> +
> +  // Calculate Rem result:
> +
> +  // Remainder_S_Den = Remainder - Den
> +  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
> +
> +  // Remainder_A_Den = Remainder + Den
> +  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
> +
> +  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
> +  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
> +                                    Remainder, Remainder_S_Den,
> ISD::SETEQ);
> +
> +  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
> +  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
> +                            Remainder_A_Den, Rem, ISD::SETEQ);
> +  SDValue Ops[2];
> +  Ops[0] = Div;
> +  Ops[1] = Rem;
> +  return DAG.getMergeValues(Ops, 2, DL);
> +}
> +
>
> +//===----------------------------------------------------------------------===//
> +// Helper functions
>
> +//===----------------------------------------------------------------------===//
> +
> +bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
> +  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
> +    return CFP->isExactlyValue(1.0);
> +  }
> +  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
> +    return C->isAllOnesValue();
> +  }
> +  return false;
> +}
> +
> +bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
> +  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
> +    return CFP->getValueAPF().isZero();
> +  }
> +  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
> +    return C->isNullValue();
> +  }
> +  return false;
> +}
> +
> +SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
> +                                                  const
> TargetRegisterClass *RC,
> +                                                   unsigned Reg, EVT VT)
> const {
> +  MachineFunction &MF = DAG.getMachineFunction();
> +  MachineRegisterInfo &MRI = MF.getRegInfo();
> +  unsigned VirtualRegister;
> +  if (!MRI.isLiveIn(Reg)) {
> +    VirtualRegister = MRI.createVirtualRegister(RC);
> +    MRI.addLiveIn(Reg, VirtualRegister);
> +  } else {
> +    VirtualRegister = MRI.getLiveInVirtReg(Reg);
> +  }
> +  return DAG.getRegister(VirtualRegister, VT);
> +}
> +
> +#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
> +
> +const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode)
> const {
> +  switch (Opcode) {
> +  default: return 0;
> +  // AMDIL DAG nodes
> +  NODE_NAME_CASE(MAD);
> +  NODE_NAME_CASE(CALL);
> +  NODE_NAME_CASE(UMUL);
> +  NODE_NAME_CASE(DIV_INF);
> +  NODE_NAME_CASE(RET_FLAG);
> +  NODE_NAME_CASE(BRANCH_COND);
> +
> +  // AMDGPU DAG nodes
> +  NODE_NAME_CASE(DWORDADDR)
> +  NODE_NAME_CASE(FRACT)
> +  NODE_NAME_CASE(FMAX)
> +  NODE_NAME_CASE(SMAX)
> +  NODE_NAME_CASE(UMAX)
> +  NODE_NAME_CASE(FMIN)
> +  NODE_NAME_CASE(SMIN)
> +  NODE_NAME_CASE(UMIN)
> +  NODE_NAME_CASE(URECIP)
> +  NODE_NAME_CASE(INTERP)
> +  NODE_NAME_CASE(INTERP_P0)
> +  NODE_NAME_CASE(EXPORT)
> +  }
> +}
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,144 @@
> +//===-- AMDGPUISelLowering.h - AMDGPU Lowering Interface --------*- C++
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Interface definition of the TargetLowering class that is common
> +/// to all AMD GPUs.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPUISELLOWERING_H
> +#define AMDGPUISELLOWERING_H
> +
> +#include "llvm/Target/TargetLowering.h"
> +
> +namespace llvm {
> +
> +class MachineRegisterInfo;
> +
> +class AMDGPUTargetLowering : public TargetLowering {
> +private:
> +  SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
> +
> +protected:
> +
> +  /// \brief Helper function that adds Reg to the LiveIn list of the DAG's
> +  /// MachineFunction.
> +  ///
> +  /// \returns a RegisterSDNode representing Reg.
> +  SDValue CreateLiveInRegister(SelectionDAG &DAG, const
> TargetRegisterClass *RC,
> +                                                  unsigned Reg, EVT VT)
> const;
> +
> +  bool isHWTrueValue(SDValue Op) const;
> +  bool isHWFalseValue(SDValue Op) const;
> +
> +public:
> +  AMDGPUTargetLowering(TargetMachine &TM);
> +
> +  virtual SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID
> CallConv,
> +                             bool isVarArg,
> +                             const SmallVectorImpl<ISD::InputArg> &Ins,
> +                             DebugLoc DL, SelectionDAG &DAG,
> +                             SmallVectorImpl<SDValue> &InVals) const;
> +
> +  virtual SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv,
> +                              bool isVarArg,
> +                              const SmallVectorImpl<ISD::OutputArg> &Outs,
> +                              const SmallVectorImpl<SDValue> &OutVals,
> +                              DebugLoc DL, SelectionDAG &DAG) const;
> +
> +  virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerIntrinsicIABS(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerIntrinsicLRP(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerMinMax(SDValue Op, SelectionDAG &DAG) const;
> +  virtual const char* getTargetNodeName(unsigned Opcode) const;
> +
> +// Functions defined in AMDILISelLowering.cpp
> +public:
> +
> +  /// \brief Determine which of the bits specified in \p Mask are known
> to be
> +  /// either zero or one and return them in the \p KnownZero and \p
> KnownOne
> +  /// bitsets.
> +  virtual void computeMaskedBitsForTargetNode(const SDValue Op,
> +                                              APInt &KnownZero,
> +                                              APInt &KnownOne,
> +                                              const SelectionDAG &DAG,
> +                                              unsigned Depth = 0) const;
> +
> +  virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info,
> +                                  const CallInst &I, unsigned Intrinsic)
> const;
> +
> +  /// We want to mark f32/f64 floating point values as legal.
> +  bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
> +
> +  /// We don't want to shrink f64/f32 constants.
> +  bool ShouldShrinkFPConstant(EVT VT) const;
> +
> +private:
> +  void InitAMDILLowering();
> +  SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
> +  EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const;
> +  SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
> +  SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
> +};
> +
> +namespace AMDGPUISD {
> +
> +enum {
> +  // AMDIL ISD Opcodes
> +  FIRST_NUMBER = ISD::BUILTIN_OP_END,
> +  MAD,         // 32bit Fused Multiply Add instruction
> +  CALL,        // Function call based on a single integer
> +  UMUL,        // 32bit unsigned multiplication
> +  DIV_INF,      // Divide with infinity returned on zero divisor
> +  RET_FLAG,
> +  BRANCH_COND,
> +  // End AMDIL ISD Opcodes
> +  BITALIGN,
> +  DWORDADDR,
> +  FRACT,
> +  FMAX,
> +  SMAX,
> +  UMAX,
> +  FMIN,
> +  SMIN,
> +  UMIN,
> +  URECIP,
> +  INTERP,
> +  INTERP_P0,
> +  EXPORT,
> +  LAST_AMDGPU_ISD_NUMBER
> +};
> +
> +
> +} // End namespace AMDGPUISD
> +
> +namespace SIISD {
> +
> +enum {
> +  SI_FIRST = AMDGPUISD::LAST_AMDGPU_ISD_NUMBER,
> +  VCC_AND,
> +  VCC_BITCAST
> +};
> +
> +} // End namespace SIISD
> +
> +} // End namespace llvm
> +
> +#endif // AMDGPUISELLOWERING_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.cpp Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,257 @@
> +//===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo
> ------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Implementation of the TargetInstrInfo class that is common to
> all
> +/// AMD GPUs.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPUInstrInfo.h"
> +#include "AMDGPURegisterInfo.h"
> +#include "AMDGPUTargetMachine.h"
> +#include "AMDIL.h"
> +#include "llvm/CodeGen/MachineFrameInfo.h"
> +#include "llvm/CodeGen/MachineInstrBuilder.h"
> +#include "llvm/CodeGen/MachineRegisterInfo.h"
> +
> +#define GET_INSTRINFO_CTOR
> +#include "AMDGPUGenInstrInfo.inc"
> +
> +using namespace llvm;
> +
> +AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm)
> +  : AMDGPUGenInstrInfo(0,0), RI(tm, *this), TM(tm) { }
> +
> +const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
> +  return RI;
> +}
> +
> +bool AMDGPUInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
> +                                           unsigned &SrcReg, unsigned
> &DstReg,
> +                                           unsigned &SubIdx) const {
> +// TODO: Implement this function
> +  return false;
> +}
> +
> +unsigned AMDGPUInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
> +                                             int &FrameIndex) const {
> +// TODO: Implement this function
> +  return 0;
> +}
> +
> +unsigned AMDGPUInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr
> *MI,
> +                                                   int &FrameIndex) const
> {
> +// TODO: Implement this function
> +  return 0;
> +}
> +
> +bool AMDGPUInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
> +                                          const MachineMemOperand *&MMO,
> +                                          int &FrameIndex) const {
> +// TODO: Implement this function
> +  return false;
> +}
> +unsigned AMDGPUInstrInfo::isStoreFromStackSlot(const MachineInstr *MI,
> +                                              int &FrameIndex) const {
> +// TODO: Implement this function
> +  return 0;
> +}
> +unsigned AMDGPUInstrInfo::isStoreFromStackSlotPostFE(const MachineInstr
> *MI,
> +                                                    int &FrameIndex)
> const {
> +// TODO: Implement this function
> +  return 0;
> +}
> +bool AMDGPUInstrInfo::hasStoreFromStackSlot(const MachineInstr *MI,
> +                                           const MachineMemOperand *&MMO,
> +                                           int &FrameIndex) const {
> +// TODO: Implement this function
> +  return false;
> +}
> +
> +MachineInstr *
> +AMDGPUInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
> +                                      MachineBasicBlock::iterator &MBBI,
> +                                      LiveVariables *LV) const {
> +// TODO: Implement this function
> +  return NULL;
> +}
> +bool AMDGPUInstrInfo::getNextBranchInstr(MachineBasicBlock::iterator
> &iter,
> +                                        MachineBasicBlock &MBB) const {
> +  while (iter != MBB.end()) {
> +    switch (iter->getOpcode()) {
> +    default:
> +      break;
> +    case AMDGPU::BRANCH_COND_i32:
> +    case AMDGPU::BRANCH_COND_f32:
> +    case AMDGPU::BRANCH:
> +      return true;
> +    };
> +    ++iter;
> +  }
> +  return false;
> +}
> +
> +MachineBasicBlock::iterator skipFlowControl(MachineBasicBlock *MBB) {
> +  MachineBasicBlock::iterator tmp = MBB->end();
> +  if (!MBB->size()) {
> +    return MBB->end();
> +  }
> +  while (--tmp) {
> +    if (tmp->getOpcode() == AMDGPU::ENDLOOP
> +        || tmp->getOpcode() == AMDGPU::ENDIF
> +        || tmp->getOpcode() == AMDGPU::ELSE) {
> +      if (tmp == MBB->begin()) {
> +        return tmp;
> +      } else {
> +        continue;
> +      }
> +    }  else {
> +      return ++tmp;
> +    }
> +  }
> +  return MBB->end();
> +}
> +
> +void
> +AMDGPUInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
> +                                    MachineBasicBlock::iterator MI,
> +                                    unsigned SrcReg, bool isKill,
> +                                    int FrameIndex,
> +                                    const TargetRegisterClass *RC,
> +                                    const TargetRegisterInfo *TRI) const {
> +  assert(!"Not Implemented");
> +}
> +
> +void
> +AMDGPUInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
> +                                     MachineBasicBlock::iterator MI,
> +                                     unsigned DestReg, int FrameIndex,
> +                                     const TargetRegisterClass *RC,
> +                                     const TargetRegisterInfo *TRI) const
> {
> +  assert(!"Not Implemented");
> +}
> +
> +MachineInstr *
> +AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
> +                                      MachineInstr *MI,
> +                                      const SmallVectorImpl<unsigned>
> &Ops,
> +                                      int FrameIndex) const {
> +// TODO: Implement this function
> +  return 0;
> +}
> +MachineInstr*
> +AMDGPUInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
> +                                      MachineInstr *MI,
> +                                      const SmallVectorImpl<unsigned>
> &Ops,
> +                                      MachineInstr *LoadMI) const {
> +  // TODO: Implement this function
> +  return 0;
> +}
> +bool
> +AMDGPUInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
> +                                     const SmallVectorImpl<unsigned>
> &Ops) const {
> +  // TODO: Implement this function
> +  return false;
> +}
> +bool
> +AMDGPUInstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr
> *MI,
> +                                 unsigned Reg, bool UnfoldLoad,
> +                                 bool UnfoldStore,
> +                                 SmallVectorImpl<MachineInstr*> &NewMIs)
> const {
> +  // TODO: Implement this function
> +  return false;
> +}
> +
> +bool
> +AMDGPUInstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
> +                                    SmallVectorImpl<SDNode*> &NewNodes)
> const {
> +  // TODO: Implement this function
> +  return false;
> +}
> +
> +unsigned
> +AMDGPUInstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
> +                                           bool UnfoldLoad, bool
> UnfoldStore,
> +                                           unsigned *LoadRegIndex) const {
> +  // TODO: Implement this function
> +  return 0;
> +}
> +
> +bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode
> *Load2,
> +                                             int64_t Offset1, int64_t
> Offset2,
> +                                             unsigned NumLoads) const {
> +  assert(Offset2 > Offset1
> +         && "Second offset should be larger than first offset!");
> +  // If we have less than 16 loads in a row, and the offsets are within
> 16,
> +  // then schedule together.
> +  // TODO: Make the loads schedule near if it fits in a cacheline
> +  return (NumLoads < 16 && (Offset2 - Offset1) < 16);
> +}
> +
> +bool
> +AMDGPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand>
> &Cond)
> +  const {
> +  // TODO: Implement this function
> +  return true;
> +}
> +void AMDGPUInstrInfo::insertNoop(MachineBasicBlock &MBB,
> +                                MachineBasicBlock::iterator MI) const {
> +  // TODO: Implement this function
> +}
> +
> +bool AMDGPUInstrInfo::isPredicated(const MachineInstr *MI) const {
> +  // TODO: Implement this function
> +  return false;
> +}
> +bool
> +AMDGPUInstrInfo::SubsumesPredicate(const SmallVectorImpl<MachineOperand>
> &Pred1,
> +                                  const SmallVectorImpl<MachineOperand>
> &Pred2)
> +  const {
> +  // TODO: Implement this function
> +  return false;
> +}
> +
> +bool AMDGPUInstrInfo::DefinesPredicate(MachineInstr *MI,
> +                                      std::vector<MachineOperand> &Pred)
> const {
> +  // TODO: Implement this function
> +  return false;
> +}
> +
> +bool AMDGPUInstrInfo::isPredicable(MachineInstr *MI) const {
> +  // TODO: Implement this function
> +  return MI->getDesc().isPredicable();
> +}
> +
> +bool
> +AMDGPUInstrInfo::isSafeToMoveRegClassDefs(const TargetRegisterClass *RC)
> const {
> +  // TODO: Implement this function
> +  return true;
> +}
> +
> +void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF,
> +    DebugLoc DL) const {
> +  MachineRegisterInfo &MRI = MF.getRegInfo();
> +  const AMDGPURegisterInfo & RI = getRegisterInfo();
> +
> +  for (unsigned i = 0; i < MI.getNumOperands(); i++) {
> +    MachineOperand &MO = MI.getOperand(i);
> +    // Convert dst regclass to one that is supported by the ISA
> +    if (MO.isReg() && MO.isDef()) {
> +      if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
> +        const TargetRegisterClass * oldRegClass =
> MRI.getRegClass(MO.getReg());
> +        const TargetRegisterClass * newRegClass =
> RI.getISARegClass(oldRegClass);
> +
> +        assert(newRegClass);
> +
> +        MRI.setRegClass(MO.getReg(), newRegClass);
> +      }
> +    }
> +  }
> +}
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.h Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,149 @@
> +//===-- AMDGPUInstrInfo.h - AMDGPU Instruction Information ------*- C++
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Contains the definition of a TargetInstrInfo class that is
> common
> +/// to all AMD GPUs.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPUINSTRUCTIONINFO_H
> +#define AMDGPUINSTRUCTIONINFO_H
> +
> +#include "AMDGPURegisterInfo.h"
> +#include "AMDGPUInstrInfo.h"
> +#include "llvm/Target/TargetInstrInfo.h"
> +
> +#include <map>
> +
> +#define GET_INSTRINFO_HEADER
> +#define GET_INSTRINFO_ENUM
> +#include "AMDGPUGenInstrInfo.inc"
> +
> +#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
> +#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
> +#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
> +#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
> +
> +namespace llvm {
> +
> +class AMDGPUTargetMachine;
> +class MachineFunction;
> +class MachineInstr;
> +class MachineInstrBuilder;
> +
> +class AMDGPUInstrInfo : public AMDGPUGenInstrInfo {
> +private:
> +  const AMDGPURegisterInfo RI;
> +  TargetMachine &TM;
> +  bool getNextBranchInstr(MachineBasicBlock::iterator &iter,
> +                          MachineBasicBlock &MBB) const;
> +public:
> +  explicit AMDGPUInstrInfo(TargetMachine &tm);
> +
> +  virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0;
> +
> +  bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg,
> +                             unsigned &DstReg, unsigned &SubIdx) const;
> +
> +  unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex)
> const;
> +  unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
> +                                     int &FrameIndex) const;
> +  bool hasLoadFromStackSlot(const MachineInstr *MI,
> +                            const MachineMemOperand *&MMO,
> +                            int &FrameIndex) const;
> +  unsigned isStoreFromStackSlot(const MachineInstr *MI, int &FrameIndex)
> const;
> +  unsigned isStoreFromStackSlotPostFE(const MachineInstr *MI,
> +                                      int &FrameIndex) const;
> +  bool hasStoreFromStackSlot(const MachineInstr *MI,
> +                             const MachineMemOperand *&MMO,
> +                             int &FrameIndex) const;
> +
> +  MachineInstr *
> +  convertToThreeAddress(MachineFunction::iterator &MFI,
> +                        MachineBasicBlock::iterator &MBBI,
> +                        LiveVariables *LV) const;
> +
> +
> +  virtual void copyPhysReg(MachineBasicBlock &MBB,
> +                           MachineBasicBlock::iterator MI, DebugLoc DL,
> +                           unsigned DestReg, unsigned SrcReg,
> +                           bool KillSrc) const = 0;
> +
> +  void storeRegToStackSlot(MachineBasicBlock &MBB,
> +                           MachineBasicBlock::iterator MI,
> +                           unsigned SrcReg, bool isKill, int FrameIndex,
> +                           const TargetRegisterClass *RC,
> +                           const TargetRegisterInfo *TRI) const;
> +  void loadRegFromStackSlot(MachineBasicBlock &MBB,
> +                            MachineBasicBlock::iterator MI,
> +                            unsigned DestReg, int FrameIndex,
> +                            const TargetRegisterClass *RC,
> +                            const TargetRegisterInfo *TRI) const;
> +
> +protected:
> +  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
> +                                      MachineInstr *MI,
> +                                      const SmallVectorImpl<unsigned>
> &Ops,
> +                                      int FrameIndex) const;
> +  MachineInstr *foldMemoryOperandImpl(MachineFunction &MF,
> +                                      MachineInstr *MI,
> +                                      const SmallVectorImpl<unsigned>
> &Ops,
> +                                      MachineInstr *LoadMI) const;
> +public:
> +  bool canFoldMemoryOperand(const MachineInstr *MI,
> +                            const SmallVectorImpl<unsigned> &Ops) const;
> +  bool unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
> +                           unsigned Reg, bool UnfoldLoad, bool
> UnfoldStore,
> +                           SmallVectorImpl<MachineInstr *> &NewMIs) const;
> +  bool unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
> +                           SmallVectorImpl<SDNode *> &NewNodes) const;
> +  unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
> +                                      bool UnfoldLoad, bool UnfoldStore,
> +                                      unsigned *LoadRegIndex = 0) const;
> +  bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
> +                               int64_t Offset1, int64_t Offset2,
> +                               unsigned NumLoads) const;
> +
> +  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
> const;
> +  void insertNoop(MachineBasicBlock &MBB,
> +                  MachineBasicBlock::iterator MI) const;
> +  bool isPredicated(const MachineInstr *MI) const;
> +  bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
> +                         const SmallVectorImpl<MachineOperand> &Pred2)
> const;
> +  bool DefinesPredicate(MachineInstr *MI,
> +                        std::vector<MachineOperand> &Pred) const;
> +  bool isPredicable(MachineInstr *MI) const;
> +  bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const;
> +
> +  // Helper functions that check the opcode for status information
> +  bool isLoadInst(llvm::MachineInstr *MI) const;
> +  bool isExtLoadInst(llvm::MachineInstr *MI) const;
> +  bool isSWSExtLoadInst(llvm::MachineInstr *MI) const;
> +  bool isSExtLoadInst(llvm::MachineInstr *MI) const;
> +  bool isZExtLoadInst(llvm::MachineInstr *MI) const;
> +  bool isAExtLoadInst(llvm::MachineInstr *MI) const;
> +  bool isStoreInst(llvm::MachineInstr *MI) const;
> +  bool isTruncStoreInst(llvm::MachineInstr *MI) const;
> +
> +  virtual MachineInstr* getMovImmInstr(MachineFunction *MF, unsigned
> DstReg,
> +                                       int64_t Imm) const = 0;
> +  virtual unsigned getIEQOpcode() const = 0;
> +  virtual bool isMov(unsigned opcode) const = 0;
> +
> +  /// \brief Convert the AMDIL MachineInstr to a supported ISA
> +  /// MachineInstr
> +  virtual void convertToISA(MachineInstr & MI, MachineFunction &MF,
> +    DebugLoc DL) const;
> +
> +};
> +
> +} // End llvm namespace
> +
> +#endif // AMDGPUINSTRINFO_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUInstrInfo.td Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,74 @@
> +//===-- AMDGPUInstrInfo.td - AMDGPU DAG nodes --------------*- tablegen
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains DAG node defintions for the AMDGPU target.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
>
> +//===----------------------------------------------------------------------===//
> +// AMDGPU DAG Profiles
>
> +//===----------------------------------------------------------------------===//
> +
> +def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
> +  SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3>
> +]>;
> +
>
> +//===----------------------------------------------------------------------===//
> +// AMDGPU DAG Nodes
> +//
> +
> +// out = ((a << 32) | b) >> c)
> +//
> +// Can be used to optimize rtol:
> +// rotl(a, b) = bitalign(a, a, 32 - b)
> +def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
> +
> +// This argument to this node is a dword address.
> +def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
> +
> +// out = a - floor(a)
> +def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>;
> +
> +// out = max(a, b) a and b are floats
> +def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp,
> +  [SDNPCommutative, SDNPAssociative]
> +>;
> +
> +// out = max(a, b) a and b are signed ints
> +def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp,
> +  [SDNPCommutative, SDNPAssociative]
> +>;
> +
> +// out = max(a, b) a and b are unsigned ints
> +def AMDGPUumax : SDNode<"AMDGPUISD::UMAX", SDTIntBinOp,
> +  [SDNPCommutative, SDNPAssociative]
> +>;
> +
> +// out = min(a, b) a and b are floats
> +def AMDGPUfmin : SDNode<"AMDGPUISD::FMIN", SDTFPBinOp,
> +  [SDNPCommutative, SDNPAssociative]
> +>;
> +
> +// out = min(a, b) a snd b are signed ints
> +def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
> +  [SDNPCommutative, SDNPAssociative]
> +>;
> +
> +// out = min(a, b) a and b are unsigned ints
> +def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
> +  [SDNPCommutative, SDNPAssociative]
> +>;
> +
> +// urecip - This operation is a helper for integer division, it returns
> the
> +// result of 1 / a as a fractional unsigned integer.
> +// out = (2^32 / a) + e
> +// e is rounding error
> +def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>;
> +
> +def fpow : SDNode<"ISD::FPOW", SDTFPBinOp>;
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUInstructions.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUInstructions.td?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUInstructions.td (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUInstructions.td Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,190 @@
> +//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +// This file contains instruction defs that are common to all hw codegen
> +// targets.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +class AMDGPUInst <dag outs, dag ins, string asm, list<dag> pattern> :
> Instruction {
> +  field bits<16> AMDILOp = 0;
> +  field bits<3> Gen = 0;
> +
> +  let Namespace = "AMDGPU";
> +  let OutOperandList = outs;
> +  let InOperandList = ins;
> +  let AsmString = asm;
> +  let Pattern = pattern;
> +  let Itinerary = NullALU;
> +  let TSFlags{42-40} = Gen;
> +  let TSFlags{63-48} = AMDILOp;
> +}
> +
> +class AMDGPUShaderInst <dag outs, dag ins, string asm, list<dag> pattern>
> +    : AMDGPUInst<outs, ins, asm, pattern> {
> +
> +  field bits<32> Inst = 0xffffffff;
> +
> +}
> +
> +def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
> +
> +def COND_EQ : PatLeaf <
> +  (cond),
> +  [{switch(N->get()){{default: return false;
> +                     case ISD::SETOEQ: case ISD::SETUEQ:
> +                     case ISD::SETEQ: return true;}}}]
> +>;
> +
> +def COND_NE : PatLeaf <
> +  (cond),
> +  [{switch(N->get()){{default: return false;
> +                     case ISD::SETONE: case ISD::SETUNE:
> +                     case ISD::SETNE: return true;}}}]
> +>;
> +def COND_GT : PatLeaf <
> +  (cond),
> +  [{switch(N->get()){{default: return false;
> +                     case ISD::SETOGT: case ISD::SETUGT:
> +                     case ISD::SETGT: return true;}}}]
> +>;
> +
> +def COND_GE : PatLeaf <
> +  (cond),
> +  [{switch(N->get()){{default: return false;
> +                     case ISD::SETOGE: case ISD::SETUGE:
> +                     case ISD::SETGE: return true;}}}]
> +>;
> +
> +def COND_LT : PatLeaf <
> +  (cond),
> +  [{switch(N->get()){{default: return false;
> +                     case ISD::SETOLT: case ISD::SETULT:
> +                     case ISD::SETLT: return true;}}}]
> +>;
> +
> +def COND_LE : PatLeaf <
> +  (cond),
> +  [{switch(N->get()){{default: return false;
> +                     case ISD::SETOLE: case ISD::SETULE:
> +                     case ISD::SETLE: return true;}}}]
> +>;
> +
>
> +//===----------------------------------------------------------------------===//
> +// Load/Store Pattern Fragments
>
> +//===----------------------------------------------------------------------===//
> +
> +def zextloadi8_global : PatFrag<(ops node:$ptr), (zextloadi8 node:$ptr),
> [{
> +    return isGlobalLoad(dyn_cast<LoadSDNode>(N));
> +}]>;
> +
> +class Constants {
> +int TWO_PI = 0x40c90fdb;
> +int PI = 0x40490fdb;
> +int TWO_PI_INV = 0x3e22f983;
> +}
> +def CONST : Constants;
> +
> +def FP_ZERO : PatLeaf <
> +  (fpimm),
> +  [{return N->getValueAPF().isZero();}]
> +>;
> +
> +def FP_ONE : PatLeaf <
> +  (fpimm),
> +  [{return N->isExactlyValue(1.0);}]
> +>;
> +
> +let isCodeGenOnly = 1, isPseudo = 1, usesCustomInserter = 1  in {
> +
> +class CLAMP <RegisterClass rc> : AMDGPUShaderInst <
> +  (outs rc:$dst),
> +  (ins rc:$src0),
> +  "CLAMP $dst, $src0",
> +  [(set rc:$dst, (int_AMDIL_clamp rc:$src0, (f32 FP_ZERO), (f32 FP_ONE)))]
> +>;
> +
> +class FABS <RegisterClass rc> : AMDGPUShaderInst <
> +  (outs rc:$dst),
> +  (ins rc:$src0),
> +  "FABS $dst, $src0",
> +  [(set rc:$dst, (fabs rc:$src0))]
> +>;
> +
> +class FNEG <RegisterClass rc> : AMDGPUShaderInst <
> +  (outs rc:$dst),
> +  (ins rc:$src0),
> +  "FNEG $dst, $src0",
> +  [(set rc:$dst, (fneg rc:$src0))]
> +>;
> +
> +def SHADER_TYPE : AMDGPUShaderInst <
> +  (outs),
> +  (ins i32imm:$type),
> +  "SHADER_TYPE $type",
> +  [(int_AMDGPU_shader_type imm:$type)]
> +>;
> +
> +} // End isCodeGenOnly = 1, isPseudo = 1, hasCustomInserter = 1
> +
> +/* Generic helper patterns for intrinsics */
> +/* -------------------------------------- */
> +
> +class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst
> mul,
> +                  RegisterClass rc> : Pat <
> +  (fpow rc:$src0, rc:$src1),
> +  (exp_ieee (mul rc:$src1, (log_ieee rc:$src0)))
> +>;
> +
> +/* Other helper patterns */
> +/* --------------------- */
> +
> +/* Extract element pattern */
> +class Extract_Element <ValueType sub_type, ValueType vec_type,
> +                     RegisterClass vec_class, int sub_idx,
> +                     SubRegIndex sub_reg>: Pat<
> +  (sub_type (vector_extract (vec_type vec_class:$src), sub_idx)),
> +  (EXTRACT_SUBREG vec_class:$src, sub_reg)
> +>;
> +
> +/* Insert element pattern */
> +class Insert_Element <ValueType elem_type, ValueType vec_type,
> +                      RegisterClass elem_class, RegisterClass vec_class,
> +                      int sub_idx, SubRegIndex sub_reg> : Pat <
> +
> +  (vec_type (vector_insert (vec_type vec_class:$vec),
> +                           (elem_type elem_class:$elem), sub_idx)),
> +  (INSERT_SUBREG vec_class:$vec, elem_class:$elem, sub_reg)
> +>;
> +
> +// Vector Build pattern
> +class Vector_Build <ValueType vecType, RegisterClass vectorClass,
> +                    ValueType elemType, RegisterClass elemClass> : Pat <
> +  (vecType (build_vector (elemType elemClass:$x), (elemType elemClass:$y),
> +                         (elemType elemClass:$z), (elemType
> elemClass:$w))),
> +  (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG (INSERT_SUBREG
> +  (vecType (IMPLICIT_DEF)), elemClass:$x, sel_x), elemClass:$y, sel_y),
> +                            elemClass:$z, sel_z), elemClass:$w, sel_w)
> +>;
> +
> +// bitconvert pattern
> +class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat <
> +  (dt (bitconvert (st rc:$src0))),
> +  (dt rc:$src0)
> +>;
> +
> +class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat <
> +  (vt (AMDGPUdwordaddr (vt rc:$addr))),
> +  (vt rc:$addr)
> +>;
> +
> +include "R600Instructions.td"
> +
> +include "SIInstrInfo.td"
> +
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUIntrinsics.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUIntrinsics.td?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUIntrinsics.td (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUIntrinsics.td Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,62 @@
> +//===-- AMDGPUIntrinsics.td - Common intrinsics  -*- tablegen
> -*-----------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +// This file defines intrinsics that are used by all hw codegen targets.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +let TargetPrefix = "AMDGPU", isTarget = 1 in {
> +
> +  def int_AMDGPU_load_const : Intrinsic<[llvm_float_ty], [llvm_i32_ty],
> [IntrNoMem]>;
> +  def int_AMDGPU_load_imm : Intrinsic<[llvm_v4f32_ty], [llvm_i32_ty],
> [IntrNoMem]>;
> +  def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty,
> llvm_i32_ty], []>;
> +  def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty], [IntrNoMem]>;
> +
> +  def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty],
> [IntrNoMem]>;
> +  def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty,
> llvm_v4f32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
> +  def int_AMDGPU_kilp : Intrinsic<[], [], []>;
> +  def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty],
> [IntrNoMem]>;
> +  def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty],
> [IntrNoMem]>;
> +  def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_sle : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_sne : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_mullit : Intrinsic<[llvm_v4f32_ty], [llvm_float_ty,
> llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
> +  def int_AMDGPU_tex : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_txb : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_txf : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty,
> llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_txq : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_txd : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
> [IntrNoMem]>;
> +  def int_AMDGPU_txl : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_trunc : Intrinsic<[llvm_float_ty], [llvm_float_ty],
> [IntrNoMem]>;
> +  def int_AMDGPU_ddx : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_ddy : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
> llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_imax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty,
> llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_imin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty,
> llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_umax : Intrinsic<[llvm_i32_ty], [llvm_i32_ty,
> llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_umin : Intrinsic<[llvm_i32_ty], [llvm_i32_ty,
> llvm_i32_ty], [IntrNoMem]>;
> +  def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
> [IntrNoMem]>;
> +
> +  def int_AMDGPU_shader_type : Intrinsic<[], [llvm_i32_ty], []>;
> +}
> +
> +let TargetPrefix = "TGSI", isTarget = 1 in {
> +
> +  def int_TGSI_lit_z : Intrinsic<[llvm_float_ty], [llvm_float_ty,
> llvm_float_ty, llvm_float_ty],[IntrNoMem]>;
> +}
> +
> +include "SIIntrinsics.td"
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.cpp Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,77 @@
> +//===- AMDGPUMCInstLower.cpp - Lower AMDGPU MachineInstr to an MCInst
> -----===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Code to lower AMDGPU MachineInstrs to their corresponding
> MCInst.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +
> +#include "AMDGPUMCInstLower.h"
> +#include "AMDGPUAsmPrinter.h"
> +#include "R600InstrInfo.h"
> +#include "llvm/CodeGen/MachineBasicBlock.h"
> +#include "llvm/CodeGen/MachineInstr.h"
> +#include "llvm/Constants.h"
> +#include "llvm/MC/MCInst.h"
> +#include "llvm/MC/MCStreamer.h"
> +#include "llvm/Support/ErrorHandling.h"
> +
> +using namespace llvm;
> +
> +AMDGPUMCInstLower::AMDGPUMCInstLower() { }
> +
> +void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI)
> const {
> +  OutMI.setOpcode(MI->getOpcode());
> +
> +  for (unsigned i = 0, e = MI->getNumExplicitOperands(); i != e; ++i) {
> +    const MachineOperand &MO = MI->getOperand(i);
> +
> +    MCOperand MCOp;
> +    switch (MO.getType()) {
> +    default:
> +      llvm_unreachable("unknown operand type");
> +    case MachineOperand::MO_FPImmediate: {
> +      const APFloat &FloatValue = MO.getFPImm()->getValueAPF();
> +      assert(&FloatValue.getSemantics() == &APFloat::IEEEsingle &&
> +             "Only floating point immediates are supported at the
> moment.");
> +      MCOp = MCOperand::CreateFPImm(FloatValue.convertToFloat());
> +      break;
> +    }
> +    case MachineOperand::MO_Immediate:
> +      MCOp = MCOperand::CreateImm(MO.getImm());
> +      break;
> +    case MachineOperand::MO_Register:
> +      MCOp = MCOperand::CreateReg(MO.getReg());
> +      break;
> +    }
> +    OutMI.addOperand(MCOp);
> +  }
> +}
> +
> +void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
> +  AMDGPUMCInstLower MCInstLowering;
> +
> +  if (MI->isBundle()) {
> +    const MachineBasicBlock *MBB = MI->getParent();
> +    MachineBasicBlock::const_instr_iterator I = MI;
> +    ++I;
> +    while (I != MBB->end() && I->isInsideBundle()) {
> +      MCInst MCBundleInst;
> +      const MachineInstr *BundledInst = I;
> +      MCInstLowering.lower(BundledInst, MCBundleInst);
> +      OutStreamer.EmitInstruction(MCBundleInst);
> +      ++I;
> +    }
> +  } else {
> +    MCInst TmpInst;
> +    MCInstLowering.lower(MI, TmpInst);
> +    OutStreamer.EmitInstruction(TmpInst);
> +  }
> +}
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUMCInstLower.h Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,31 @@
> +//===- AMDGPUMCInstLower.h MachineInstr Lowering Interface ------*- C++
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
> +/// \file
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPU_MCINSTLOWER_H
> +#define AMDGPU_MCINSTLOWER_H
> +
> +namespace llvm {
> +
> +class MCInst;
> +class MachineInstr;
> +
> +class AMDGPUMCInstLower {
> +
> +public:
> +  AMDGPUMCInstLower();
> +
> +  /// \brief Lower a MachineInstr to an MCInst
> +  void lower(const MachineInstr *MI, MCInst &OutMI) const;
> +
> +};
> +
> +} // End namespace llvm
> +
> +#endif //AMDGPU_MCINSTLOWER_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.cpp Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,51 @@
> +//===-- AMDGPURegisterInfo.cpp - AMDGPU Register Information
> -------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Parent TargetRegisterInfo class common to all hw codegen
> targets.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPURegisterInfo.h"
> +#include "AMDGPUTargetMachine.h"
> +
> +using namespace llvm;
> +
> +AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm,
> +    const TargetInstrInfo &tii)
> +: AMDGPUGenRegisterInfo(0),
> +  TM(tm),
> +  TII(tii)
> +  { }
> +
>
> +//===----------------------------------------------------------------------===//
> +// Function handling callbacks - Functions are a seldom used feature of
> GPUS, so
> +// they are not supported at this time.
>
> +//===----------------------------------------------------------------------===//
> +
> +const uint16_t AMDGPURegisterInfo::CalleeSavedReg = AMDGPU::NoRegister;
> +
> +const uint16_t* AMDGPURegisterInfo::getCalleeSavedRegs(const
> MachineFunction *MF)
> +
> const {
> +  return &CalleeSavedReg;
> +}
> +
> +void AMDGPURegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator
> MI,
> +                                             int SPAdj,
> +                                             RegScavenger *RS) const {
> +  assert(!"Subroutines not supported yet");
> +}
> +
> +unsigned AMDGPURegisterInfo::getFrameRegister(const MachineFunction &MF)
> const {
> +  assert(!"Subroutines not supported yet");
> +  return 0;
> +}
> +
> +#define GET_REGINFO_TARGET_DESC
> +#include "AMDGPUGenRegisterInfo.inc"
>
> Added: llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.h Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,63 @@
> +//===-- AMDGPURegisterInfo.h - AMDGPURegisterInfo Interface -*- C++
> -*-----===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief TargetRegisterInfo interface that is implemented by all hw
> codegen
> +/// targets.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPUREGISTERINFO_H
> +#define AMDGPUREGISTERINFO_H
> +
> +#include "llvm/ADT/BitVector.h"
> +#include "llvm/Target/TargetRegisterInfo.h"
> +
> +#define GET_REGINFO_HEADER
> +#define GET_REGINFO_ENUM
> +#include "AMDGPUGenRegisterInfo.inc"
> +
> +namespace llvm {
> +
> +class AMDGPUTargetMachine;
> +class TargetInstrInfo;
> +
> +struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo {
> +  TargetMachine &TM;
> +  const TargetInstrInfo &TII;
> +  static const uint16_t CalleeSavedReg;
> +
> +  AMDGPURegisterInfo(TargetMachine &tm, const TargetInstrInfo &tii);
> +
> +  virtual BitVector getReservedRegs(const MachineFunction &MF) const {
> +    assert(!"Unimplemented");  return BitVector();
> +  }
> +
> +  /// \param RC is an AMDIL reg class.
> +  ///
> +  /// \returns The ISA reg class that is equivalent to \p RC.
> +  virtual const TargetRegisterClass * getISARegClass(
> +                                         const TargetRegisterClass * RC)
> const {
> +    assert(!"Unimplemented"); return NULL;
> +  }
> +
> +  virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT)
> const {
> +    assert(!"Unimplemented"); return NULL;
> +  }
> +
> +  const uint16_t* getCalleeSavedRegs(const MachineFunction *MF) const;
> +  void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
> +                           RegScavenger *RS) const;
> +  unsigned getFrameRegister(const MachineFunction &MF) const;
> +
> +};
> +
> +} // End namespace llvm
> +
> +#endif // AMDIDSAREGISTERINFO_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.td?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.td (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPURegisterInfo.td Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,22 @@
> +//===-- AMDGPURegisterInfo.td - AMDGPU register info -------*- tablegen
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +// Tablegen register definitions common to all hw codegen targets.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +let Namespace = "AMDGPU" in {
> +  def sel_x : SubRegIndex;
> +  def sel_y : SubRegIndex;
> +  def sel_z : SubRegIndex;
> +  def sel_w : SubRegIndex;
> +}
> +
> +include "R600RegisterInfo.td"
> +include "SIRegisterInfo.td"
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUSubtarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUSubtarget.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUSubtarget.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUSubtarget.cpp Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,87 @@
> +//===-- AMDGPUSubtarget.cpp - AMDGPU Subtarget Information
> ----------------===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief Implements the AMDGPU specific subclass of TargetSubtarget.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPUSubtarget.h"
> +
> +using namespace llvm;
> +
> +#define GET_SUBTARGETINFO_ENUM
> +#define GET_SUBTARGETINFO_TARGET_DESC
> +#define GET_SUBTARGETINFO_CTOR
> +#include "AMDGPUGenSubtargetInfo.inc"
> +
> +AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef
> FS) :
> +  AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) {
> +    InstrItins = getInstrItineraryForCPU(CPU);
> +
> +  memset(CapsOverride, 0, sizeof(*CapsOverride)
> +      * AMDGPUDeviceInfo::MaxNumberCapabilities);
> +  // Default card
> +  StringRef GPU = CPU;
> +  Is64bit = false;
> +  DefaultSize[0] = 64;
> +  DefaultSize[1] = 1;
> +  DefaultSize[2] = 1;
> +  ParseSubtargetFeatures(GPU, FS);
> +  DevName = GPU;
> +  Device = AMDGPUDeviceInfo::getDeviceFromName(DevName, this, Is64bit);
> +}
> +
> +AMDGPUSubtarget::~AMDGPUSubtarget() {
> +  delete Device;
> +}
> +
> +bool
> +AMDGPUSubtarget::isOverride(AMDGPUDeviceInfo::Caps caps) const {
> +  assert(caps < AMDGPUDeviceInfo::MaxNumberCapabilities &&
> +      "Caps index is out of bounds!");
> +  return CapsOverride[caps];
> +}
> +bool
> +AMDGPUSubtarget::is64bit() const  {
> +  return Is64bit;
> +}
> +bool
> +AMDGPUSubtarget::isTargetELF() const {
> +  return false;
> +}
> +size_t
> +AMDGPUSubtarget::getDefaultSize(uint32_t dim) const {
> +  if (dim > 3) {
> +    return 1;
> +  } else {
> +    return DefaultSize[dim];
> +  }
> +}
> +
> +std::string
> +AMDGPUSubtarget::getDataLayout() const {
> +    if (!Device) {
> +        return std::string("e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16"
> +                "-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:32:32"
> +                "-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64"
> +                "-v96:128:128-v128:128:128-v192:256:256-v256:256:256"
> +                "-v512:512:512-v1024:1024:1024-v2048:2048:2048-a0:0:64");
> +    }
> +    return Device->getDataLayout();
> +}
> +
> +std::string
> +AMDGPUSubtarget::getDeviceName() const {
> +  return DevName;
> +}
> +const AMDGPUDevice *
> +AMDGPUSubtarget::device() const {
> +  return Device;
> +}
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUSubtarget.h Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,65 @@
> +//=====-- AMDGPUSubtarget.h - Define Subtarget for the AMDIL ---*- C++
> -*-====//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//==-----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief AMDGPU specific subclass of TargetSubtarget.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPUSUBTARGET_H
> +#define AMDGPUSUBTARGET_H
> +#include "AMDILDevice.h"
> +#include "llvm/ADT/StringExtras.h"
> +#include "llvm/ADT/StringRef.h"
> +#include "llvm/Target/TargetSubtargetInfo.h"
> +
> +#define GET_SUBTARGETINFO_HEADER
> +#include "AMDGPUGenSubtargetInfo.inc"
> +
> +#define MAX_CB_SIZE (1 << 16)
> +
> +namespace llvm {
> +
> +class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
> +private:
> +  bool CapsOverride[AMDGPUDeviceInfo::MaxNumberCapabilities];
> +  const AMDGPUDevice *Device;
> +  size_t DefaultSize[3];
> +  std::string DevName;
> +  bool Is64bit;
> +  bool Is32on64bit;
> +  bool DumpCode;
> +  bool R600ALUInst;
> +
> +  InstrItineraryData InstrItins;
> +
> +public:
> +  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS);
> +  virtual ~AMDGPUSubtarget();
> +
> +  const InstrItineraryData &getInstrItineraryData() const { return
> InstrItins; }
> +  virtual void ParseSubtargetFeatures(llvm::StringRef CPU,
> llvm::StringRef FS);
> +
> +  bool isOverride(AMDGPUDeviceInfo::Caps) const;
> +  bool is64bit() const;
> +
> +  // Helper functions to simplify if statements
> +  bool isTargetELF() const;
> +  const AMDGPUDevice* device() const;
> +  std::string getDataLayout() const;
> +  std::string getDeviceName() const;
> +  virtual size_t getDefaultSize(uint32_t dim) const;
> +  bool dumpCode() const { return DumpCode; }
> +  bool r600ALUEncoding() const { return R600ALUInst; }
> +
> +};
> +
> +} // End namespace llvm
> +
> +#endif // AMDGPUSUBTARGET_H
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.cpp Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,141 @@
> +//===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen
> targets-----===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief The AMDGPU target machine contains all of the hardware specific
> +/// information  needed to emit code for R600 and SI GPUs.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#include "AMDGPUTargetMachine.h"
> +#include "AMDGPU.h"
> +#include "R600ISelLowering.h"
> +#include "R600InstrInfo.h"
> +#include "SIISelLowering.h"
> +#include "SIInstrInfo.h"
> +#include "llvm/Analysis/Passes.h"
> +#include "llvm/Analysis/Verifier.h"
> +#include "llvm/CodeGen/MachineFunctionAnalysis.h"
> +#include "llvm/CodeGen/MachineModuleInfo.h"
> +#include "llvm/CodeGen/Passes.h"
> +#include "llvm/MC/MCAsmInfo.h"
> +#include "llvm/PassManager.h"
> +#include "llvm/Support/TargetRegistry.h"
> +#include "llvm/Support/raw_os_ostream.h"
> +#include "llvm/Transforms/IPO.h"
> +#include "llvm/Transforms/Scalar.h"
> +#include <llvm/CodeGen/Passes.h>
> +
> +using namespace llvm;
> +
> +extern "C" void LLVMInitializeR600Target() {
> +  // Register the target
> +  RegisterTargetMachine<AMDGPUTargetMachine> X(TheAMDGPUTarget);
> +}
> +
> +AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT,
> +    StringRef CPU, StringRef FS,
> +  TargetOptions Options,
> +  Reloc::Model RM, CodeModel::Model CM,
> +  CodeGenOpt::Level OptLevel
> +)
> +:
> +  LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OptLevel),
> +  Subtarget(TT, CPU, FS),
> +  Layout(Subtarget.getDataLayout()),
> +  FrameLowering(TargetFrameLowering::StackGrowsUp,
> +      Subtarget.device()->getStackAlignment(), 0),
> +  IntrinsicInfo(this),
> +  InstrItins(&Subtarget.getInstrItineraryData()) {
> +  // TLInfo uses InstrInfo so it must be initialized after.
> +  if (Subtarget.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
> +    InstrInfo = new R600InstrInfo(*this);
> +    TLInfo = new R600TargetLowering(*this);
> +  } else {
> +    InstrInfo = new SIInstrInfo(*this);
> +    TLInfo = new SITargetLowering(*this);
> +  }
> +}
> +
> +AMDGPUTargetMachine::~AMDGPUTargetMachine() {
> +}
> +
> +namespace {
> +class AMDGPUPassConfig : public TargetPassConfig {
> +public:
> +  AMDGPUPassConfig(AMDGPUTargetMachine *TM, PassManagerBase &PM)
> +    : TargetPassConfig(TM, PM) {}
> +
> +  AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
> +    return getTM<AMDGPUTargetMachine>();
> +  }
> +
> +  virtual bool addPreISel();
> +  virtual bool addInstSelector();
> +  virtual bool addPreRegAlloc();
> +  virtual bool addPostRegAlloc();
> +  virtual bool addPreSched2();
> +  virtual bool addPreEmitPass();
> +};
> +} // End of anonymous namespace
> +
> +TargetPassConfig *AMDGPUTargetMachine::createPassConfig(PassManagerBase
> &PM) {
> +  return new AMDGPUPassConfig(this, PM);
> +}
> +
> +bool
> +AMDGPUPassConfig::addPreISel() {
> +  return false;
> +}
> +
> +bool AMDGPUPassConfig::addInstSelector() {
> +  addPass(createAMDGPUPeepholeOpt(*TM));
> +  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
> +  return false;
> +}
> +
> +bool AMDGPUPassConfig::addPreRegAlloc() {
> +  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> +
> +  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
> +    addPass(createSIAssignInterpRegsPass(*TM));
> +  }
> +  addPass(createAMDGPUConvertToISAPass(*TM));
> +  if (ST.device()->getGeneration() > AMDGPUDeviceInfo::HD6XXX) {
> +    addPass(createSIFixSGPRLivenessPass(*TM));
> +  }
> +  return false;
> +}
> +
> +bool AMDGPUPassConfig::addPostRegAlloc() {
> +  return false;
> +}
> +
> +bool AMDGPUPassConfig::addPreSched2() {
> +
> +  addPass(&IfConverterID);
> +  return false;
> +}
> +
> +bool AMDGPUPassConfig::addPreEmitPass() {
> +  addPass(createAMDGPUCFGPreparationPass(*TM));
> +  addPass(createAMDGPUCFGStructurizerPass(*TM));
> +
> +  const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> +  if (ST.device()->getGeneration() <= AMDGPUDeviceInfo::HD6XXX) {
> +    addPass(createR600ExpandSpecialInstrsPass(*TM));
> +    addPass(&FinalizeMachineBundlesID);
> +  } else {
> +    addPass(createSILowerLiteralConstantsPass(*TM));
> +    addPass(createSILowerControlFlowPass(*TM));
> +  }
> +
> +  return false;
> +}
> +
>
> Added: llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDGPUTargetMachine.h Tue Dec 11 15:25:42
> 2012
> @@ -0,0 +1,70 @@
> +//===-- AMDGPUTargetMachine.h - AMDGPU TargetMachine Interface --*- C++
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This file is distributed under the University of Illinois Open Source
> +// License. See LICENSE.TXT for details.
> +//
>
> +//===----------------------------------------------------------------------===//
> +//
> +/// \file
> +/// \brief The AMDGPU TargetMachine interface definition for hw codgen
> targets.
> +//
>
> +//===----------------------------------------------------------------------===//
> +
> +#ifndef AMDGPU_TARGET_MACHINE_H
> +#define AMDGPU_TARGET_MACHINE_H
> +
> +#include "AMDGPUInstrInfo.h"
> +#include "AMDGPUSubtarget.h"
> +#include "AMDILFrameLowering.h"
> +#include "AMDILIntrinsicInfo.h"
> +#include "R600ISelLowering.h"
> +#include "llvm/ADT/OwningPtr.h"
> +#include "llvm/DataLayout.h"
> +
> +namespace llvm {
> +
> +MCAsmInfo* createMCAsmInfo(const Target &T, StringRef TT);
> +
> +class AMDGPUTargetMachine : public LLVMTargetMachine {
> +
> +  AMDGPUSubtarget Subtarget;
> +  const DataLayout Layout;
> +  AMDGPUFrameLowering FrameLowering;
> +  AMDGPUIntrinsicInfo IntrinsicInfo;
> +  const AMDGPUInstrInfo * InstrInfo;
> +  AMDGPUTargetLowering * TLInfo;
> +  const InstrItineraryData* InstrItins;
> +
> +public:
> +   AMDGPUTargetMachine(const Target &T, StringRef TT, StringRef FS,
> +                       StringRef CPU,
> +                       TargetOptions Options,
> +                       Reloc::Model RM, CodeModel::Model CM,
> +                       CodeGenOpt::Level OL);
> +   ~AMDGPUTargetMachine();
> +   virtual const AMDGPUFrameLowering* getFrameLowering() const {
> +     return &FrameLowering;
> +   }
> +   virtual const AMDGPUIntrinsicInfo* getIntrinsicInfo() const {
> +     return &IntrinsicInfo;
> +   }
> +   virtual const AMDGPUInstrInfo *getInstrInfo() const {return InstrInfo;}
> +   virtual const AMDGPUSubtarget *getSubtargetImpl() const {return
> &Subtarget; }
> +   virtual const AMDGPURegisterInfo *getRegisterInfo() const {
> +      return &InstrInfo->getRegisterInfo();
> +   }
> +   virtual AMDGPUTargetLowering * getTargetLowering() const {
> +      return TLInfo;
> +   }
> +   virtual const InstrItineraryData* getInstrItineraryData() const {
> +      return InstrItins;
> +   }
> +   virtual const DataLayout* getDataLayout() const { return &Layout; }
> +   virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
> +};
> +
> +} // End namespace llvm
> +
> +#endif // AMDGPU_TARGET_MACHINE_H
>
> Added: llvm/trunk/lib/Target/R600/AMDIL.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDIL.h?rev=169915&view=auto
>
> ==============================================================================
> --- llvm/trunk/lib/Target/R600/AMDIL.h (added)
> +++ llvm/trunk/lib/Target/R600/AMDIL.h Tue Dec 11 15:25:42 2012
> @@ -0,0 +1,106 @@
> +//===-- AMDIL.h - Top-level interface for AMDIL representation --*- C++
> -*-===//
> +//
> +//                     The LLVM Compiler Infrastructure
> +//
> +// This ...
>
> [Message clipped]
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20121211/429aa327/attachment.html>