R600 intrinsics patch
Ronie Salgado
roniesalg at gmail.com
Mon Mar 16 22:04:27 PDT 2015
Hello,
This patch is to implement OpenCL 1.1 get_global_offset() in clover, and to
also be able to call amdgpu intrinsics from clang builtin function. The
discussion concerning this patch is here:
https://bugs.freedesktop.org/show_bug.cgi?id=86326
Greetings,
Ronie
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150317/3d805ce9/attachment.html>
-------------- next part --------------
diff --git a/include/llvm/IR/IntrinsicsNVVM.td b/include/llvm/IR/IntrinsicsNVVM.td
index 9deed41..f7eab7d 100644
--- a/include/llvm/IR/IntrinsicsNVVM.td
+++ b/include/llvm/IR/IntrinsicsNVVM.td
@@ -3693,6 +3693,21 @@ class PTXReadSpecialRegisterIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
+multiclass PTXReadSpecialParameterIntrinsic_v4i32<string prefix> {
+ def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<!strconcat(prefix, "_x")>;
+ def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<!strconcat(prefix, "_y")>;
+ def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<!strconcat(prefix, "_z")>;
+ def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<!strconcat(prefix, "_w")>;
+}
+
+class PTXReadSpecialParameterIntrinsic_r32<string name>
+ : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
+ GCCBuiltin<name>;
+
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_tid">;
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
@@ -3744,3 +3759,9 @@ def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
GCCBuiltin<"__builtin_ptx_bar_sync">;
+
+def int_ptx_read_workdim : PTXReadSpecialParameterIntrinsic_r32
+ <"__builtin_ptx_read_workdim">;
+
+defm int_ptx_read_global_offset : PTXReadSpecialParameterIntrinsic_v4i32
+ <"__builtin_ptx_read_global_offset">;
diff --git a/include/llvm/IR/IntrinsicsR600.td b/include/llvm/IR/IntrinsicsR600.td
index 5055667..6130c7d 100644
--- a/include/llvm/IR/IntrinsicsR600.td
+++ b/include/llvm/IR/IntrinsicsR600.td
@@ -11,37 +11,36 @@
//
//===----------------------------------------------------------------------===//
-let TargetPrefix = "r600" in {
+let TargetPrefix = "amdgpu" in {
-class R600ReadPreloadRegisterIntrinsic<string name>
+class AMDGPUReadPreloadRegisterIntrinsic<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
-multiclass R600ReadPreloadRegisterIntrinsic_xyz<string prefix> {
- def _x : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
- def _y : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
- def _z : R600ReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
+multiclass AMDGPUReadPreloadRegisterIntrinsic_xyz<string prefix> {
+ def _x : AMDGPUReadPreloadRegisterIntrinsic<!strconcat(prefix, "_x")>;
+ def _y : AMDGPUReadPreloadRegisterIntrinsic<!strconcat(prefix, "_y")>;
+ def _z : AMDGPUReadPreloadRegisterIntrinsic<!strconcat(prefix, "_z")>;
}
-defm int_r600_read_global_size : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_global_size">;
-defm int_r600_read_local_size : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_local_size">;
-defm int_r600_read_ngroups : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_ngroups">;
-defm int_r600_read_tgid : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_tgid">;
-defm int_r600_read_tidig : R600ReadPreloadRegisterIntrinsic_xyz <
- "__builtin_r600_read_tidig">;
-} // End TargetPrefix = "r600"
-
-let TargetPrefix = "AMDGPU" in {
+defm int_amdgpu_read_global_size : AMDGPUReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_amdgpu_read_global_size">;
+defm int_amdgpu_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_amdgpu_read_local_size">;
+defm int_amdgpu_read_ngroups : AMDGPUReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_amdgpu_read_ngroups">;
+defm int_amdgpu_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_amdgpu_read_tgid">;
+defm int_amdgpu_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_amdgpu_read_tidig">;
+
+def int_amdgpu_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
+ "__builtin_amdgpu_read_workdim">;
-class AMDGPUReadPreloadRegisterIntrinsic<string name>
- : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
- GCCBuiltin<name>;
+defm int_amdgpu_read_global_offset : AMDGPUReadPreloadRegisterIntrinsic_xyz <
+ "__builtin_amdgpu_read_global_offset">;
-def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
+def int_amdgpu_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
// 1st parameter: Numerator
// 2nd parameter: Denominator
// 3rd parameter: Constant to select select between first and
@@ -50,36 +49,33 @@ def int_AMDGPU_div_scale : GCCBuiltin<"__builtin_amdgpu_div_scale">,
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
[IntrNoMem]>;
-def int_AMDGPU_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
+def int_amdgpu_div_fmas : GCCBuiltin<"__builtin_amdgpu_div_fmas">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>, llvm_i1_ty],
[IntrNoMem]>;
-def int_AMDGPU_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
+def int_amdgpu_div_fixup : GCCBuiltin<"__builtin_amdgpu_div_fixup">,
Intrinsic<[llvm_anyfloat_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem]>;
-def int_AMDGPU_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
+def int_amdgpu_trig_preop : GCCBuiltin<"__builtin_amdgpu_trig_preop">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty],
[IntrNoMem]>;
-def int_AMDGPU_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
+def int_amdgpu_rcp : GCCBuiltin<"__builtin_amdgpu_rcp">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
+def int_amdgpu_rsq : GCCBuiltin<"__builtin_amdgpu_rsq">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
+def int_amdgpu_rsq_clamped : GCCBuiltin<"__builtin_amdgpu_rsq_clamped">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
-def int_AMDGPU_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
+def int_amdgpu_ldexp : GCCBuiltin<"__builtin_amdgpu_ldexp">,
Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_i32_ty], [IntrNoMem]>;
-def int_AMDGPU_class : GCCBuiltin<"__builtin_amdgpu_class">,
+def int_amdgpu_class : GCCBuiltin<"__builtin_amdgpu_class">,
Intrinsic<[llvm_i1_ty], [llvm_anyfloat_ty, llvm_i32_ty], [IntrNoMem]>;
-def int_AMDGPU_read_workdim : AMDGPUReadPreloadRegisterIntrinsic <
- "__builtin_amdgpu_read_workdim">;
-
} // End TargetPrefix = "AMDGPU"
diff --git a/include/llvm/Support/KernelABI.h b/include/llvm/Support/KernelABI.h
new file mode 100644
index 0000000..39b73a2
--- /dev/null
+++ b/include/llvm/Support/KernelABI.h
@@ -0,0 +1,34 @@
+//===-- llvm/Support/Win64EH.h ---Win64 EH Constants-------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains constants used for implementing the interface between a
+// GPGPU kernel and the host CPU.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_KERNELABI_H
+#define LLVM_SUPPORT_KERNELABI_H
+
+namespace llvm {
+namespace KernelABI {
+
+namespace InputOffsets {
+
+/// Offsets in bytes after the kernel arguments.
+enum Offsets {
+ WORK_DIM = 0,
+ GLOBAL_OFFSET_X = 4,
+ GLOBAL_OFFSET_Y = 8,
+ GLOBAL_OFFSET_Z = 12,
+};
+
+} // End of namespace InputOffset
+} // End of namespace KernelABI
+} // End of namespace llvm
+#endif
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index b09198e..6314918 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -2063,6 +2063,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
// individually present in Ins.
// So a different index should be used for indexing into Ins.
// See similar issue in LowerCall.
+ // TODO: Support ABI
unsigned InsIdx = 0;
int idx = 0;
diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td
index 14e51aa..687c46d 100644
--- a/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -7041,6 +7041,23 @@ def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
+
+// Special purpose parameter
+class PTX_READ_SPECIAL_PARAMETER_R32<string paramname, Intrinsic intop>
+ : NVPTXInst<(outs Int32Regs:$d), (ins),
+ !strconcat(!strconcat("ld.param.u32\t$d, [", paramname), "];"),
+ [(set Int32Regs:$d, (intop))]>;
+
+def PTX_READ_WORKDIM : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_workdim",
+ int_ptx_read_workdim>;
+
+def PTX_READ_GLOBAL_OFFSET_X : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_global_offset_x",
+ int_ptx_read_global_offset_x>;
+def PTX_READ_GLOBAL_OFFSET_Y : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_global_offset_y",
+ int_ptx_read_global_offset_y>;
+def PTX_READ_GLOBAL_OFFSET_Z : PTX_READ_SPECIAL_PARAMETER_R32<"__builtin_param_global_offset_z",
+ int_ptx_read_global_offset_z>;
+
// PTX Parallel Synchronization and Communication Intrinsics
def PTX_BAR_SYNC : NVPTXInst<(outs), (ins i32imm:$i), "bar.sync\t$i;",
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index 4707279..cc9188d 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -894,7 +894,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::CLAMP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::AMDGPU_div_scale: {
+ case Intrinsic::amdgpu_div_scale: {
// 3rd parameter required to be a constant.
const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3));
if (!Param)
@@ -916,29 +916,29 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
Denominator, Numerator);
}
- case Intrinsic::AMDGPU_div_fmas:
+ case Intrinsic::amdgpu_div_fmas:
return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3),
Op.getOperand(4));
- case Intrinsic::AMDGPU_div_fixup:
+ case Intrinsic::amdgpu_div_fixup:
return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT,
Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
- case Intrinsic::AMDGPU_trig_preop:
+ case Intrinsic::amdgpu_trig_preop:
return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT,
Op.getOperand(1), Op.getOperand(2));
- case Intrinsic::AMDGPU_rcp:
+ case Intrinsic::amdgpu_rcp:
return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
- case Intrinsic::AMDGPU_rsq:
+ case Intrinsic::amdgpu_rsq:
return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
case AMDGPUIntrinsic::AMDGPU_legacy_rsq:
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
- case Intrinsic::AMDGPU_rsq_clamped:
+ case Intrinsic::amdgpu_rsq_clamped:
if (Subtarget->getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
Type *Type = VT.getTypeForEVT(*DAG.getContext());
APFloat Max = APFloat::getLargest(Type->getFltSemantics());
@@ -953,7 +953,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1));
}
- case Intrinsic::AMDGPU_ldexp:
+ case Intrinsic::amdgpu_ldexp:
return DAG.getNode(AMDGPUISD::LDEXP, DL, VT, Op.getOperand(1),
Op.getOperand(2));
@@ -1024,7 +1024,7 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case AMDGPUIntrinsic::AMDGPU_brev:
return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1));
- case Intrinsic::AMDGPU_class:
+ case Intrinsic::amdgpu_class:
return DAG.getNode(AMDGPUISD::FP_CLASS, DL, VT,
Op.getOperand(1), Op.getOperand(2));
diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp
index d4f3145..6df1df9 100644
--- a/lib/Target/R600/R600ISelLowering.cpp
+++ b/lib/Target/R600/R600ISelLowering.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Argument.h"
#include "llvm/IR/Function.h"
+#include "llvm/Support/KernelABI.h"
using namespace llvm;
@@ -794,47 +795,53 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
}
- case Intrinsic::r600_read_ngroups_x:
+ case Intrinsic::amdgpu_read_ngroups_x:
return LowerImplicitParameter(DAG, VT, DL, 0);
- case Intrinsic::r600_read_ngroups_y:
+ case Intrinsic::amdgpu_read_ngroups_y:
return LowerImplicitParameter(DAG, VT, DL, 1);
- case Intrinsic::r600_read_ngroups_z:
+ case Intrinsic::amdgpu_read_ngroups_z:
return LowerImplicitParameter(DAG, VT, DL, 2);
- case Intrinsic::r600_read_global_size_x:
+ case Intrinsic::amdgpu_read_global_size_x:
return LowerImplicitParameter(DAG, VT, DL, 3);
- case Intrinsic::r600_read_global_size_y:
+ case Intrinsic::amdgpu_read_global_size_y:
return LowerImplicitParameter(DAG, VT, DL, 4);
- case Intrinsic::r600_read_global_size_z:
+ case Intrinsic::amdgpu_read_global_size_z:
return LowerImplicitParameter(DAG, VT, DL, 5);
- case Intrinsic::r600_read_local_size_x:
+ case Intrinsic::amdgpu_read_local_size_x:
return LowerImplicitParameter(DAG, VT, DL, 6);
- case Intrinsic::r600_read_local_size_y:
+ case Intrinsic::amdgpu_read_local_size_y:
return LowerImplicitParameter(DAG, VT, DL, 7);
- case Intrinsic::r600_read_local_size_z:
+ case Intrinsic::amdgpu_read_local_size_z:
return LowerImplicitParameter(DAG, VT, DL, 8);
- case Intrinsic::AMDGPU_read_workdim:
- return LowerImplicitParameter(DAG, VT, DL, MFI->ABIArgOffset / 4);
+ case Intrinsic::amdgpu_read_workdim:
+ return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::WORK_DIM)/ 4);
+ case Intrinsic::amdgpu_read_global_offset_x:
+ return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_X) / 4);
+ case Intrinsic::amdgpu_read_global_offset_y:
+ return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Y) / 4);
+ case Intrinsic::amdgpu_read_global_offset_z:
+ return LowerImplicitParameter(DAG, VT, DL, (MFI->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Z) / 4);
- case Intrinsic::r600_read_tgid_x:
+ case Intrinsic::amdgpu_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_X, VT);
- case Intrinsic::r600_read_tgid_y:
+ case Intrinsic::amdgpu_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Y, VT);
- case Intrinsic::r600_read_tgid_z:
+ case Intrinsic::amdgpu_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T1_Z, VT);
- case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::amdgpu_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_X, VT);
- case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::amdgpu_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Y, VT);
- case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::amdgpu_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
AMDGPU::T0_Z, VT);
- case Intrinsic::AMDGPU_rsq:
+ case Intrinsic::amdgpu_rsq:
// XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
}
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index af38c94..fc8ffc2 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/IR/Function.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/KernelABI.h"
using namespace llvm;
@@ -851,55 +852,67 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
switch (IntrinsicID) {
- case Intrinsic::r600_read_ngroups_x:
+ case Intrinsic::amdgpu_read_ngroups_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_X, false);
- case Intrinsic::r600_read_ngroups_y:
+ case Intrinsic::amdgpu_read_ngroups_y:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Y, false);
- case Intrinsic::r600_read_ngroups_z:
+ case Intrinsic::amdgpu_read_ngroups_z:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::NGROUPS_Z, false);
- case Intrinsic::r600_read_global_size_x:
+ case Intrinsic::amdgpu_read_global_size_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_X, false);
- case Intrinsic::r600_read_global_size_y:
+ case Intrinsic::amdgpu_read_global_size_y:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Y, false);
- case Intrinsic::r600_read_global_size_z:
+ case Intrinsic::amdgpu_read_global_size_z:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::GLOBAL_SIZE_Z, false);
- case Intrinsic::r600_read_local_size_x:
+ case Intrinsic::amdgpu_read_local_size_x:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::LOCAL_SIZE_X, false);
- case Intrinsic::r600_read_local_size_y:
+ case Intrinsic::amdgpu_read_local_size_y:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::LOCAL_SIZE_Y, false);
- case Intrinsic::r600_read_local_size_z:
+ case Intrinsic::amdgpu_read_local_size_z:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
SI::KernelInputOffsets::LOCAL_SIZE_Z, false);
- case Intrinsic::AMDGPU_read_workdim:
+ case Intrinsic::amdgpu_read_workdim:
return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
- MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset,
+ MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::WORK_DIM,
+ false);
+ case Intrinsic::amdgpu_read_global_offset_x:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_X,
+ false);
+ case Intrinsic::amdgpu_read_global_offset_y:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Y,
+ false);
+ case Intrinsic::amdgpu_read_global_offset_z:
+ return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(),
+ MF.getInfo<SIMachineFunctionInfo>()->ABIArgOffset + KernelABI::InputOffsets::GLOBAL_OFFSET_Z,
false);
- case Intrinsic::r600_read_tgid_x:
+ case Intrinsic::amdgpu_read_tgid_x:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_X), VT);
- case Intrinsic::r600_read_tgid_y:
+ case Intrinsic::amdgpu_read_tgid_y:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Y), VT);
- case Intrinsic::r600_read_tgid_z:
+ case Intrinsic::amdgpu_read_tgid_z:
return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TGID_Z), VT);
- case Intrinsic::r600_read_tidig_x:
+ case Intrinsic::amdgpu_read_tidig_x:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_X), VT);
- case Intrinsic::r600_read_tidig_y:
+ case Intrinsic::amdgpu_read_tidig_y:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Y), VT);
- case Intrinsic::r600_read_tidig_z:
+ case Intrinsic::amdgpu_read_tidig_z:
return CreateLiveInRegister(DAG, &AMDGPU::VGPR_32RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::TIDIG_Z), VT);
case AMDGPUIntrinsic::SI_load_const: {
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 00d92c8..1c6c16e 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1028,7 +1028,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- case Intrinsic::AMDGPU_rcp: {
+ case Intrinsic::amdgpu_rcp: {
if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) {
const APFloat &ArgVal = C->getValueAPF();
APFloat Val(ArgVal.getSemantics(), 1.0);
More information about the llvm-commits
mailing list