R600 intrinsics patch

Tom Stellard tom at stellard.net
Tue Mar 17 11:10:22 PDT 2015


On Tue, Mar 17, 2015 at 11:11:56AM -0400, Tom Stellard wrote:
> On Tue, Mar 17, 2015 at 02:04:27AM -0300, Ronie Salgado wrote:
> > Hello,
> > 
> > This patch is to implement OpenCL 1.1 get_global_offset() in clover, and to
> > also be able to call amdgpu intrinsics from clang builtin function. The
> > discussion concerning this patch is here:
> > https://bugs.freedesktop.org/show_bug.cgi?id=86326
> > 
> > Greetings,
> > Ronie
> 
> Hi,
> 
> Thanks for the patch.  There are a lot of changes here and I think this
> should be broken up into several patches.
> 
> > diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
> > index 9deed41..f7eab7d 100644
> > --- a/include/llvm/IR/IntrinsicsNVVM.td
> > +++ b/include/llvm/IR/IntrinsicsNVVM.td
> > @@ -3693,6 +3693,21 @@ class PTXReadSpecialRegisterIntrinsic_r64<string name>
> >    : Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
> >      GCCBuiltin<name>;
> >  
> > +multiclass PTXReadSpecialParameterIntrinsic_v4i32<string prefix> {
> > +  def _x     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> > +               GCCBuiltin<!strconcat(prefix, "_x")>;
> > +  def _y     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> > +               GCCBuiltin<!strconcat(prefix, "_y")>;
> > +  def _z     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> > +               GCCBuiltin<!strconcat(prefix, "_z")>;
> > +  def _w     : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> > +               GCCBuiltin<!strconcat(prefix, "_w")>;
> > +}
> > +
> > +class PTXReadSpecialParameterIntrinsic_r32<string name>
> > +  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> > +    GCCBuiltin<name>;
> > +
> >  defm int_ptx_read_tid        : PTXReadSpecialRegisterIntrinsic_v4i32
> >                                 <"__builtin_ptx_read_tid">;
> >  defm int_ptx_read_ntid       : PTXReadSpecialRegisterIntrinsic_v4i32
> > @@ -3744,3 +3759,9 @@ def int_ptx_read_pm3         : PTXReadSpecialRegisterIntrinsic_r32
> >  
> >  def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
> >                         GCCBuiltin<"__builtin_ptx_bar_sync">;
> > +
> > +def int_ptx_read_workdim         : PTXReadSpecialParameterIntrinsic_r32
> > +                               <"__builtin_ptx_read_workdim">;
> > +
> > +defm int_ptx_read_global_offset       : PTXReadSpecialParameterIntrinsic_v4i32
> > +                               <"__builtin_ptx_read_global_offset">;
> 
> These NVPTX changes should be a separate patch.
> 
> > diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
> > index 5055667..6130c7d 100644
> > --- a/include/llvm/IR/IntrinsicsR600.td
> > +++ b/include/llvm/IR/IntrinsicsR600.td
> > @@ -11,37 +11,36 @@
> >  //
> >  //===----------------------------------------------------------------------===//
> >  
> > -let TargetPrefix = "r600" in {
> > +let TargetPrefix = "amdgpu" in {
> >  
> > -class R600ReadPreloadRegisterIntrinsic<string name>
> > +class AMDGPUReadPreloadRegisterIntrinsic<string name>
> >    : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> >      GCCBuiltin<name>;
> >  
> > -multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
> > -  def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
> > -  def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
> > -  def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
> > +multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz<string prefix> {
> > +  def _x : AMDGPUReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
> > +  def _y : AMDGPUReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
> > +  def _z : AMDGPUReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
> >  }
> >  
> > -defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
> > -                                       "__builtin_r600_read_global_size">;
> > -defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
> > -                                       "__builtin_r600_read_local_size">;
> > -defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
> > -                                       "__builtin_r600_read_ngroups">;
> > -defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
> > -                                       "__builtin_r600_read_tgid">;
> > -defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
> > -                                       "__builtin_r600_read_tidig">;
> > -} // End TargetPrefix = "r600"
> > -
> > -let TargetPrefix = "AMDGPU" in {
> 
> The target prefix for all these has been changed from AMDGPU to amdgpu.
> I don't think this is necessary.  Matt may know more about this.
> 

Hi Ronie,

If you can still get it to work without renaming the target preifx, that
would be best.  We can fix the prefix in a later patch.  If you can't
get it working without changing the prefix, please split that change
into a separate patch.

Thanks,
Tom

> > +defm int_amdgpu_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz <
> > +                                       "__builtin_amdgpu_read_global_size">;
> > +defm int_amdgpu_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz <
> > +                                       "__builtin_amdgpu_read_local_size">;
> > +defm int_amdgpu_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz <
> > +                                       "__builtin_amdgpu_read_ngroups">;
> > +defm int_amdgpu_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz <
> > +                                       "__builtin_amdgpu_read_tgid">;
> > +defm int_amdgpu_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz <
> > +                                       "__builtin_amdgpu_read_tidig">;
> > +
> > +def int_amdgpu_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
> > +                                       "__builtin_amdgpu_read_workdim">;
> >  
> > -class AMDGPUReadPreloadRegisterIntrinsic<string name>
> > -  : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
> > -    GCCBuiltin<name>;
> > +defm int_amdgpu_read_global_offset : AMDGPUReadPreloadRegisterIntrinsic_xyz <
> > +                                       "__builtin_amdgpu_read_global_offset">;
> >  
> > -def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
> > +def int_amdgpu_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
> >    // 1st parameter: Numerator
> >    // 2nd parameter: Denominator
> >    // 3rd parameter: Constant to select select between first and
> > @@ -50,36 +49,33 @@ def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
> >              [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
> >              [IntrNoMem]>;
> >  
> > -def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
> > +def int_amdgpu_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
> >    Intrinsic<[llvm_anyfloat_ty],
> >              [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
> >              [IntrNoMem]>;
> >  
> > -def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
> > +def int_amdgpu_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
> >    Intrinsic<[llvm_anyfloat_ty],
> >              [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
> >              [IntrNoMem]>;
> >  
> > -def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
> > +def int_amdgpu_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
> >    Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
> >              [IntrNoMem]>;
> >  
> > -def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
> > +def int_amdgpu_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
> >    Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> >  
> > -def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
> > +def int_amdgpu_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
> >    Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> >  
> > -def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
> > +def int_amdgpu_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
> >    Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
> >  
> > -def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
> > +def int_amdgpu_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
> >    Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
> >  
> > -def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
> > +def int_amdgpu_class : GCCBuiltin<"__builtin_amdgpu_class">,
> >    Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
> >  
> > -def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
> > -                                       "__builtin_amdgpu_read_workdim">;
> > -
> >  } // End TargetPrefix = "AMDGPU"
> > diff --git a/include/llvm/Support/KernelABI.h b/include/llvm/Support/KernelABI.h
> > new file mode 100644
> > index 0000000..39b73a2
> > --- /dev/null
> > +++ b/include/llvm/Support/KernelABI.h
> > @@ -0,0 +1,34 @@
> > +//===-- llvm/Support/Win64EH.h ---Win64 EH Constants-------------*- C++ -*-===//
> > +//
> > +//                     The LLVM Compiler Infrastructure
> > +//
> > +// This file is distributed under the University of Illinois Open Source
> > +// License. See LICENSE.TXT for details.
> > +//
> > +//===----------------------------------------------------------------------===//
> > +//
> > +// This file contains constants used for implementing the interface between a
> > +// GPGPU kernel and the host CPU.
> > +//
> > +//===----------------------------------------------------------------------===//
> > +
> > +#ifndef LLVM_SUPPORT_KERNELABI_H
> > +#define LLVM_SUPPORT_KERNELABI_H
> > +
> > +namespace llvm {
> > +namespace KernelABI {
> > +
> > +namespace InputOffsets {
> > +
> > +/// Offsets in bytes after the kernel arguments.
> > +enum Offsets {
> > +    WORK_DIM = 0,
> > +    GLOBAL_OFFSET_X = 4,
> > +    GLOBAL_OFFSET_Y = 8,
> > +    GLOBAL_OFFSET_Z = 12,
> > +};
> 
> You can put this enum in SIInstrInfo.h with the KernelInputOffsets enum.
> 
> > +
> > +} // End of namespace InputOffset
> > +} // End of namespace KernelABI
> > +} // End of namespace llvm
> > +#endif
> > diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
> > index b09198e..6314918 100644
> > --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
> > +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
> > @@ -2063,6 +2063,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
> >    //     individually present in Ins.
> >    // So a different index should be used for indexing into Ins.
> >    // See similar issue in LowerCall.
> > +  // TODO: Support ABI
> 
> NVPTX is not really setup to use with clover, so I don't think we need to add this comment.
> 
> >    unsigned InsIdx = 0;
> >  
> >    int idx = 0;
> > diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
> > index 14e51aa..687c46d 100644
> > --- a/lib/Target/NVPTX/NVPTXIntrinsics.td
> > +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
> > @@ -7041,6 +7041,23 @@ def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
> >  def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
> >  def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
> >  
> > +
> > +// Special purpose parameter
> > +class PTX_READ_SPECIAL_PARAMETER_R32<string paramname, Intrinsic intop>
> > +  : NVPTXInst<(outs Int32Regs:$d), (ins),
> > +              !strconcat(!strconcat("ld.param.u32\t$d, [", paramname), "];"),
> > +              [(set Int32Regs:$d, (intop))]>;
> > +
> > +def PTX_READ_WORKDIM : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_workdim",
> > +                                                       int_ptx_read_workdim>;
> > +
> > +def PTX_READ_GLOBAL_OFFSET_X : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_global_offset_x",
> > +                                                       int_ptx_read_global_offset_x>;
> > +def PTX_READ_GLOBAL_OFFSET_Y : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_global_offset_y",
> > +                                                       int_ptx_read_global_offset_y>;
> > +def PTX_READ_GLOBAL_OFFSET_Z : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_global_offset_z",
> > +                                                       int_ptx_read_global_offset_z>;
> > +
> >  // PTX Parallel Synchronization and Communication Intrinsics
> > 
> 
> This should go in a different patch with the other NVPTX changes.
> 
> >  def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
> > diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> > index 4707279..cc9188d 100644
> > --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> > +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> > @@ -894,7 +894,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> >        return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
> >                           Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
> >  
> > -    case Intrinsic::AMDGPU_div_scale: {
> > +    case Intrinsic::amdgpu_div_scale: {
> >        // 3rd parameter required to be a constant.
> >        const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
> >        if (!Param)
> > @@ -916,29 +916,29 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> >                           Denominator, Numerator);
> >      }
> >  
> > -    case Intrinsic::AMDGPU_div_fmas:
> > +    case Intrinsic::amdgpu_div_fmas:
> >        return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
> >                           Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
> >                           Op.getOperand(4));
> >  
> > -    case Intrinsic::AMDGPU_div_fixup:
> > +    case Intrinsic::amdgpu_div_fixup:
> >        return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
> >                           Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
> >  
> > -    case Intrinsic::AMDGPU_trig_preop:
> > +    case Intrinsic::amdgpu_trig_preop:
> >        return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
> >                           Op.getOperand(1), Op.getOperand(2));
> >  
> > -    case Intrinsic::AMDGPU_rcp:
> > +    case Intrinsic::amdgpu_rcp:
> >        return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
> >  
> > -    case Intrinsic::AMDGPU_rsq:
> > +    case Intrinsic::amdgpu_rsq:
> >        return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
> >  
> >      case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
> >        return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
> >  
> > -    case Intrinsic::AMDGPU_rsq_clamped:
> > +    case Intrinsic::amdgpu_rsq_clamped:
> >        if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
> >          Type *Type = VT.getTypeForEVT(*DAG.getContext());
> >          APFloat Max = APFloat::getLargest(Type->getFltSemantics());
> > @@ -953,7 +953,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> >          return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
> >        }
> >  
> > -    case Intrinsic::AMDGPU_ldexp:
> > +    case Intrinsic::amdgpu_ldexp:
> >        return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
> >                                                     Op.getOperand(2));
> >  
> > @@ -1024,7 +1024,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> >      case AMDGPUIntrinsic::AMDGPU_brev:
> >        return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
> >  
> > -  case Intrinsic::AMDGPU_class:
> > +  case Intrinsic::amdgpu_class:
> >      return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
> >                         Op.getOperand(1), Op.getOperand(2));
> >  
> > diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
> > index d4f3145..6df1df9 100644
> > --- a/lib/Target/R600/R600ISelLowering.cpp
> > +++ b/lib/Target/R600/R600ISelLowering.cpp
> > @@ -27,6 +27,7 @@
> >  #include "llvm/CodeGen/SelectionDAG.h"
> >  #include "llvm/IR/Argument.h"
> >  #include "llvm/IR/Function.h"
> > +#include "llvm/Support/KernelABI.h"
> >  
> >  using namespace llvm;
> >  
> > @@ -794,47 +795,53 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
> >        return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
> >      }
> >  
> > -    case Intrinsic::r600_read_ngroups_x:
> > +    case Intrinsic::amdgpu_read_ngroups_x:
> >        return LowerImplicitParameter(DAG, VT, DL, 0);
> > -    case Intrinsic::r600_read_ngroups_y:
> > +    case Intrinsic::amdgpu_read_ngroups_y:
> >        return LowerImplicitParameter(DAG, VT, DL, 1);
> > -    case Intrinsic::r600_read_ngroups_z:
> > +    case Intrinsic::amdgpu_read_ngroups_z:
> >        return LowerImplicitParameter(DAG, VT, DL, 2);
> > -    case Intrinsic::r600_read_global_size_x:
> > +    case Intrinsic::amdgpu_read_global_size_x:
> >        return LowerImplicitParameter(DAG, VT, DL, 3);
> > -    case Intrinsic::r600_read_global_size_y:
> > +    case Intrinsic::amdgpu_read_global_size_y:
> >        return LowerImplicitParameter(DAG, VT, DL, 4);
> > -    case Intrinsic::r600_read_global_size_z:
> > +    case Intrinsic::amdgpu_read_global_size_z:
> >        return LowerImplicitParameter(DAG, VT, DL, 5);
> > -    case Intrinsic::r600_read_local_size_x:
> > +    case Intrinsic::amdgpu_read_local_size_x:
> >        return LowerImplicitParameter(DAG, VT, DL, 6);
> > -    case Intrinsic::r600_read_local_size_y:
> > +    case Intrinsic::amdgpu_read_local_size_y:
> >        return LowerImplicitParameter(DAG, VT, DL, 7);
> > -    case Intrinsic::r600_read_local_size_z:
> > +    case Intrinsic::amdgpu_read_local_size_z:
> >        return LowerImplicitParameter(DAG, VT, DL, 8);
> >  
> > -    case Intrinsic::AMDGPU_read_workdim:
> > -      return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
> > +    case Intrinsic::amdgpu_read_workdim:
> > +      return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::WORK_DIM)/ 4);
> > +    case Intrinsic::amdgpu_read_global_offset_x:
> > +      return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_X) / 4);
> > +    case Intrinsic::amdgpu_read_global_offset_y:
> > +      return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Y) / 4);
> > +    case Intrinsic::amdgpu_read_global_offset_z:
> > +      return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Z) / 4);
> >  
> > -    case Intrinsic::r600_read_tgid_x:
> > +    case Intrinsic::amdgpu_read_tgid_x:
> >        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> >                                    AMDGPU::T1_X, VT);
> > -    case Intrinsic::r600_read_tgid_y:
> > +    case Intrinsic::amdgpu_read_tgid_y:
> >        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> >                                    AMDGPU::T1_Y, VT);
> > -    case Intrinsic::r600_read_tgid_z:
> > +    case Intrinsic::amdgpu_read_tgid_z:
> >        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> >                                    AMDGPU::T1_Z, VT);
> > -    case Intrinsic::r600_read_tidig_x:
> > +    case Intrinsic::amdgpu_read_tidig_x:
> >        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> >                                    AMDGPU::T0_X, VT);
> > -    case Intrinsic::r600_read_tidig_y:
> > +    case Intrinsic::amdgpu_read_tidig_y:
> >        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> >                                    AMDGPU::T0_Y, VT);
> > -    case Intrinsic::r600_read_tidig_z:
> > +    case Intrinsic::amdgpu_read_tidig_z:
> >        return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
> >                                    AMDGPU::T0_Z, VT);
> > -    case Intrinsic::AMDGPU_rsq:
> > +    case Intrinsic::amdgpu_rsq:
> >        // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
> >        return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
> >      }
> > diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> > index af38c94..fc8ffc2 100644
> > --- a/lib/Target/R600/SIISelLowering.cpp
> > +++ b/lib/Target/R600/SIISelLowering.cpp
> > @@ -32,6 +32,7 @@
> >  #include "llvm/CodeGen/SelectionDAG.h"
> >  #include "llvm/IR/Function.h"
> >  #include "llvm/ADT/SmallString.h"
> > +#include "llvm/Support/KernelABI.h"
> >  
> >  using namespace llvm;
> >  
> > @@ -851,55 +852,67 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
> >    unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
> >  
> >    switch (IntrinsicID) {
> > -  case Intrinsic::r600_read_ngroups_x:
> > +  case Intrinsic::amdgpu_read_ngroups_x:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::NGROUPS_X, false);
> > -  case Intrinsic::r600_read_ngroups_y:
> > +  case Intrinsic::amdgpu_read_ngroups_y:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::NGROUPS_Y, false);
> > -  case Intrinsic::r600_read_ngroups_z:
> > +  case Intrinsic::amdgpu_read_ngroups_z:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::NGROUPS_Z, false);
> > -  case Intrinsic::r600_read_global_size_x:
> > +  case Intrinsic::amdgpu_read_global_size_x:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
> > -  case Intrinsic::r600_read_global_size_y:
> > +  case Intrinsic::amdgpu_read_global_size_y:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
> > -  case Intrinsic::r600_read_global_size_z:
> > +  case Intrinsic::amdgpu_read_global_size_z:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
> > -  case Intrinsic::r600_read_local_size_x:
> > +  case Intrinsic::amdgpu_read_local_size_x:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::LOCAL_SIZE_X, false);
> > -  case Intrinsic::r600_read_local_size_y:
> > +  case Intrinsic::amdgpu_read_local_size_y:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
> > -  case Intrinsic::r600_read_local_size_z:
> > +  case Intrinsic::amdgpu_read_local_size_z:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> >                            SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
> >  
> > -  case Intrinsic::AMDGPU_read_workdim:
> > +  case Intrinsic::amdgpu_read_workdim:
> >      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> > -                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
> > +                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::WORK_DIM,
> > +                          false);
> > +  case Intrinsic::amdgpu_read_global_offset_x:
> > +    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> > +                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_X,
> > +                          false);
> > +  case Intrinsic::amdgpu_read_global_offset_y:
> > +    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> > +                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Y,
> > +                          false);
> > +  case Intrinsic::amdgpu_read_global_offset_z:
> > +    return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
> > +                          MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Z,
> >                            false);
> >  
> > -  case Intrinsic::r600_read_tgid_x:
> > +  case Intrinsic::amdgpu_read_tgid_x:
> >      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
> >        TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
> > -  case Intrinsic::r600_read_tgid_y:
> > +  case Intrinsic::amdgpu_read_tgid_y:
> >      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
> >        TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
> > -  case Intrinsic::r600_read_tgid_z:
> > +  case Intrinsic::amdgpu_read_tgid_z:
> >      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
> >        TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
> > -  case Intrinsic::r600_read_tidig_x:
> > +  case Intrinsic::amdgpu_read_tidig_x:
> >      return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
> >        TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
> > -  case Intrinsic::r600_read_tidig_y:
> > +  case Intrinsic::amdgpu_read_tidig_y:
> >      return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
> >        TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
> > -  case Intrinsic::r600_read_tidig_z:
> > +  case Intrinsic::amdgpu_read_tidig_z:
> >      return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
> >        TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
> >    case AMDGPUIntrinsic::SI_load_const: {
> > diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
> > index 00d92c8..1c6c16e 100644
> > --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
> > +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
> > @@ -1028,7 +1028,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
> >      break;
> >    }
> >  
> > -  case Intrinsic::AMDGPU_rcp: {
> > +  case Intrinsic::amdgpu_rcp: {
> >      if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
> >        const APFloat &ArgVal = C->getValueAPF();
> >        APFloat Val(ArgVal.getSemantics(), 1.0);
> 
> > _______________________________________________
> > llvm-commits mailing list
> > llvm-commits at cs.uiuc.edu
> > http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits



More information about the llvm-commits mailing list