R600 intrinsics patch
Ronie Salgado
roniesalg at gmail.com
Mon Mar 16 22:07:29 PDT 2015
Hello,
This patch is to be able to use some of the R600/AMDGPU intrinsics from
Clang, and also to use amdgpu instead of r600. Part of the discussions
concerning this patch is here:
https://bugs.freedesktop.org/show_bug.cgi?id=86326
Greetings,
Ronie
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150317/e7d20769/attachment.html>
-------------- next part --------------
diff --git a/include/clang/Basic/BuiltinsNVPTX.def b/include/clang/Basic/BuiltinsNVPTX.def
index 9c920dc..a72db13 100644
--- a/include/clang/Basic/BuiltinsNVPTX.def
+++ b/include/clang/Basic/BuiltinsNVPTX.def
@@ -59,6 +59,11 @@ BUILTIN(__builtin_ptx_read_pm3, "i", "n")
BUILTIN(__builtin_ptx_bar_sync, "vi", "n")
+BUILTIN(__builtin_ptx_read_workdim, "i", "nc")
+
+BUILTIN(__builtin_ptx_read_global_offset_x, "i", "nc")
+BUILTIN(__builtin_ptx_read_global_offset_y, "i", "nc")
+BUILTIN(__builtin_ptx_read_global_offset_z, "i", "nc")
// Builtins exposed as part of NVVM
// MISC
diff --git a/include/clang/Basic/BuiltinsR600.def b/include/clang/Basic/BuiltinsR600.def
index 84fc4fa..ad4361b 100644
--- a/include/clang/Basic/BuiltinsR600.def
+++ b/include/clang/Basic/BuiltinsR600.def
@@ -14,6 +14,26 @@
// The format of this database matches clang/Basic/Builtins.def.
+BUILTIN(__builtin_amdgpu_read_global_size_x, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_global_size_y, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_global_size_z, "i", "nc")
+
+BUILTIN(__builtin_amdgpu_read_local_size_x, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_local_size_y, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_local_size_z, "i", "nc")
+
+BUILTIN(__builtin_amdgpu_read_ngroups_x, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_ngroups_y, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_ngroups_z, "i", "nc")
+
+BUILTIN(__builtin_amdgpu_read_tgid_x, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_tgid_y, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_tgid_z, "i", "nc")
+
+BUILTIN(__builtin_amdgpu_read_tidig_x, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_tidig_y, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_tidig_z, "i", "nc")
+
BUILTIN(__builtin_amdgpu_div_scale, "dddbb*", "n")
BUILTIN(__builtin_amdgpu_div_scalef, "fffbb*", "n")
BUILTIN(__builtin_amdgpu_div_fmas, "ddddb", "nc")
@@ -33,4 +53,9 @@ BUILTIN(__builtin_amdgpu_ldexpf, "ffi", "nc")
BUILTIN(__builtin_amdgpu_class, "bdi", "nc")
BUILTIN(__builtin_amdgpu_classf, "bfi", "nc")
+BUILTIN(__builtin_amdgpu_read_workdim, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_global_offset_x, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_global_offset_y, "i", "nc")
+BUILTIN(__builtin_amdgpu_read_global_offset_z, "i", "nc")
+
#undef BUILTIN
diff --git a/lib/CodeGen/CGBuiltin.cpp b/lib/CodeGen/CGBuiltin.cpp
index 4c79a34..6d58e04 100644
--- a/lib/CodeGen/CGBuiltin.cpp
+++ b/lib/CodeGen/CGBuiltin.cpp
@@ -6428,7 +6428,7 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
llvm::Value *Y = EmitScalarExpr(E->getArg(1));
llvm::Value *Z = EmitScalarExpr(E->getArg(2));
- llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::AMDGPU_div_scale,
+ llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgpu_div_scale,
X->getType());
llvm::Value *Tmp = Builder.CreateCall3(Callee, X, Y, Z);
@@ -6451,32 +6451,32 @@ Value *CodeGenFunction::EmitR600BuiltinExpr(unsigned BuiltinID,
llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
- llvm::Value *F = CGM.getIntrinsic(Intrinsic::AMDGPU_div_fmas,
+ llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgpu_div_fmas,
Src0->getType());
llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
return Builder.CreateCall4(F, Src0, Src1, Src2, Src3ToBool);
}
case R600::BI__builtin_amdgpu_div_fixup:
case R600::BI__builtin_amdgpu_div_fixupf:
- return emitTernaryFPBuiltin(*this, E, Intrinsic::AMDGPU_div_fixup);
+ return emitTernaryFPBuiltin(*this, E, Intrinsic::amdgpu_div_fixup);
case R600::BI__builtin_amdgpu_trig_preop:
case R600::BI__builtin_amdgpu_trig_preopf:
- return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_trig_preop);
+ return emitFPIntBuiltin(*this, E, Intrinsic::amdgpu_trig_preop);
case R600::BI__builtin_amdgpu_rcp:
case R600::BI__builtin_amdgpu_rcpf:
- return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rcp);
+ return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgpu_rcp);
case R600::BI__builtin_amdgpu_rsq:
case R600::BI__builtin_amdgpu_rsqf:
- return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq);
+ return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgpu_rsq);
case R600::BI__builtin_amdgpu_rsq_clamped:
case R600::BI__builtin_amdgpu_rsq_clampedf:
- return emitUnaryFPBuiltin(*this, E, Intrinsic::AMDGPU_rsq_clamped);
+ return emitUnaryFPBuiltin(*this, E, Intrinsic::amdgpu_rsq_clamped);
case R600::BI__builtin_amdgpu_ldexp:
case R600::BI__builtin_amdgpu_ldexpf:
- return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_ldexp);
+ return emitFPIntBuiltin(*this, E, Intrinsic::amdgpu_ldexp);
case R600::BI__builtin_amdgpu_class:
case R600::BI__builtin_amdgpu_classf:
- return emitFPIntBuiltin(*this, E, Intrinsic::AMDGPU_class);
+ return emitFPIntBuiltin(*this, E, Intrinsic::amdgpu_class);
default:
return nullptr;
}
More information about the cfe-commits
mailing list