[llvm] ae3c981 - [NVPTX] Enforce half type support is present for builtins
Jakub Chlanda via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 27 23:48:18 PDT 2023
Author: Jakub Chlanda
Date: 2023-03-28T08:48:10+02:00
New Revision: ae3c981aa4b85cfae6531ba50df7ad84feebe43c
URL: https://github.com/llvm/llvm-project/commit/ae3c981aa4b85cfae6531ba50df7ad84feebe43c
DIFF: https://github.com/llvm/llvm-project/commit/ae3c981aa4b85cfae6531ba50df7ad84feebe43c.diff
LOG: [NVPTX] Enforce half type support is present for builtins
Differential Revision: https://reviews.llvm.org/D146715
Added:
Modified:
clang/lib/CodeGen/CGBuiltin.cpp
clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
llvm/include/llvm/IR/IntrinsicsNVVM.td
Removed:
################################################################################
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c8112b0ea0ec0..f399b0770143a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18162,32 +18162,63 @@ static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
#undef MMA_VARIANTS_B1_XOR
}
+static Value *MakeLdgLdu(unsigned IntrinsicID, CodeGenFunction &CGF,
+ const CallExpr *E) {
+ Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
+ QualType ArgType = E->getArg(0)->getType();
+ clang::CharUnits Align = CGF.CGM.getNaturalPointeeTypeAlignment(ArgType);
+ llvm::Type *ElemTy = CGF.ConvertTypeForMem(ArgType->getPointeeType());
+ return CGF.Builder.CreateCall(
+ CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
+ {Ptr, ConstantInt::get(CGF.Builder.getInt32Ty(), Align.getQuantity())});
+}
+
+static Value *MakeScopedAtomic(unsigned IntrinsicID, CodeGenFunction &CGF,
+ const CallExpr *E) {
+ Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Type *ElemTy =
+ CGF.ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
+ return CGF.Builder.CreateCall(
+ CGF.CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
+ {Ptr, CGF.EmitScalarExpr(E->getArg(1))});
+}
+
+static Value *MakeHalfType(unsigned IntrinsicID, unsigned BuiltinID,
+ const CallExpr *E, CodeGenFunction &CGF) {
+ auto &C = CGF.CGM.getContext();
+ if (!(C.getLangOpts().NativeHalfType ||
+ !C.getTargetInfo().useFP16ConversionIntrinsics())) {
+ CGF.CGM.Error(E->getExprLoc(), C.BuiltinInfo.getName(BuiltinID).str() +
+ " requires native half type support.");
+ return nullptr;
+ }
+
+ if (IntrinsicID == Intrinsic::nvvm_ldg_global_f ||
+ IntrinsicID == Intrinsic::nvvm_ldu_global_f)
+ return MakeLdgLdu(IntrinsicID, CGF, E);
+
+ SmallVector<Value *, 16> Args;
+ auto *F = CGF.CGM.getIntrinsic(IntrinsicID);
+ auto *FTy = F->getFunctionType();
+ unsigned ICEArguments = 0;
+ ASTContext::GetBuiltinTypeError Error;
+ C.GetBuiltinType(BuiltinID, Error, &ICEArguments);
+ assert(Error == ASTContext::GE_None && "Should not codegen an error");
+ for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
+ assert((ICEArguments & (1 << i)) == 0);
+ auto *ArgValue = CGF.EmitScalarExpr(E->getArg(i));
+ auto *PTy = FTy->getParamType(i);
+ if (PTy != ArgValue->getType())
+ ArgValue = CGF.Builder.CreateBitCast(ArgValue, PTy);
+ Args.push_back(ArgValue);
+ }
+
+ return CGF.Builder.CreateCall(F, Args);
+}
} // namespace
-Value *
-CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
- auto HasHalfSupport = [&](unsigned BuiltinID) {
- auto &Context = getContext();
- return Context.getLangOpts().NativeHalfType ||
- !Context.getTargetInfo().useFP16ConversionIntrinsics();
- };
- auto MakeLdgLdu = [&](unsigned IntrinsicID) {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- QualType ArgType = E->getArg(0)->getType();
- clang::CharUnits Align = CGM.getNaturalPointeeTypeAlignment(ArgType);
- llvm::Type *ElemTy = ConvertTypeForMem(ArgType->getPointeeType());
- return Builder.CreateCall(
- CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
- {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
- };
- auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
- llvm::Type *ElemTy =
- ConvertTypeForMem(E->getArg(0)->getType()->getPointeeType());
- return Builder.CreateCall(
- CGM.getIntrinsic(IntrinsicID, {ElemTy, Ptr->getType()}),
- {Ptr, EmitScalarExpr(E->getArg(1))});
- };
+Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
+ const CallExpr *E) {
switch (BuiltinID) {
case NVPTX::BI__nvvm_atom_add_gen_i:
case NVPTX::BI__nvvm_atom_add_gen_l:
@@ -18297,22 +18328,13 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
// PTX Interoperability section 2.2: "For a vector with an even number of
// elements, its alignment is set to number of elements times the alignment
// of its member: n*alignof(t)."
- return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i);
- case NVPTX::BI__nvvm_ldg_h:
- case NVPTX::BI__nvvm_ldg_h2:
- if (!HasHalfSupport(BuiltinID)) {
- CGM.Error(E->getExprLoc(),
- getContext().BuiltinInfo.getName(BuiltinID).str() +
- " requires native half type support.");
- return nullptr;
- }
- [[fallthrough]];
+ return MakeLdgLdu(Intrinsic::nvvm_ldg_global_i, *this, E);
case NVPTX::BI__nvvm_ldg_f:
case NVPTX::BI__nvvm_ldg_f2:
case NVPTX::BI__nvvm_ldg_f4:
case NVPTX::BI__nvvm_ldg_d:
case NVPTX::BI__nvvm_ldg_d2:
- return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f);
+ return MakeLdgLdu(Intrinsic::nvvm_ldg_global_f, *this, E);
case NVPTX::BI__nvvm_ldu_c:
case NVPTX::BI__nvvm_ldu_c2:
@@ -18338,105 +18360,96 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
case NVPTX::BI__nvvm_ldu_ul:
case NVPTX::BI__nvvm_ldu_ull:
case NVPTX::BI__nvvm_ldu_ull2:
- return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i);
- case NVPTX::BI__nvvm_ldu_h:
- case NVPTX::BI__nvvm_ldu_h2:
- if (!HasHalfSupport(BuiltinID)) {
- CGM.Error(E->getExprLoc(),
- getContext().BuiltinInfo.getName(BuiltinID).str() +
- " requires native half type support.");
- return nullptr;
- }
- [[fallthrough]];
+ return MakeLdgLdu(Intrinsic::nvvm_ldu_global_i, *this, E);
case NVPTX::BI__nvvm_ldu_f:
case NVPTX::BI__nvvm_ldu_f2:
case NVPTX::BI__nvvm_ldu_f4:
case NVPTX::BI__nvvm_ldu_d:
case NVPTX::BI__nvvm_ldu_d2:
- return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f);
+ return MakeLdgLdu(Intrinsic::nvvm_ldu_global_f, *this, E);
case NVPTX::BI__nvvm_atom_cta_add_gen_i:
case NVPTX::BI__nvvm_atom_cta_add_gen_l:
case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_add_gen_i:
case NVPTX::BI__nvvm_atom_sys_add_gen_l:
case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_add_gen_f:
case NVPTX::BI__nvvm_atom_cta_add_gen_d:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_add_gen_f:
case NVPTX::BI__nvvm_atom_sys_add_gen_d:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_max_gen_i:
case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
case NVPTX::BI__nvvm_atom_cta_max_gen_l:
case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_max_gen_i:
case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
case NVPTX::BI__nvvm_atom_sys_max_gen_l:
case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_min_gen_i:
case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
case NVPTX::BI__nvvm_atom_cta_min_gen_l:
case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_min_gen_i:
case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
case NVPTX::BI__nvvm_atom_sys_min_gen_l:
case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_and_gen_i:
case NVPTX::BI__nvvm_atom_cta_and_gen_l:
case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_and_gen_i:
case NVPTX::BI__nvvm_atom_sys_and_gen_l:
case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_or_gen_i:
case NVPTX::BI__nvvm_atom_cta_or_gen_l:
case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_or_gen_i:
case NVPTX::BI__nvvm_atom_sys_or_gen_l:
case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta, *this, E);
case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
- return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
+ return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys, *this, E);
case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
@@ -18701,6 +18714,138 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
CharUnits::fromQuantity(4));
return Result;
}
+ // The following builtins require half type support
+ case NVPTX::BI__nvvm_ex2_approx_f16:
+ return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ex2_approx_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_ex2_approx_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ff2f16x2_rn:
+ return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ff2f16x2_rn_relu:
+ return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rn_relu, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ff2f16x2_rz:
+ return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ff2f16x2_rz_relu:
+ return MakeHalfType(Intrinsic::nvvm_ff2f16x2_rz_relu, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_f16:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_ftz_f16:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_ftz_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fma_rn_ftz_relu_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_relu_f16x2, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fma_rn_ftz_sat_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_ftz_sat_f16x2, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fma_rn_relu_f16:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_relu_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_relu_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_sat_f16:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fma_rn_sat_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fma_rn_sat_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_ftz_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_ftz_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_ftz_nan_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_ftz_nan_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_f16x2, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16, BuiltinID,
+ E, *this);
+ case NVPTX::BI__nvvm_fmax_ftz_nan_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_f16x2,
+ BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmax_ftz_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_ftz_xorsign_abs_f16x2, BuiltinID,
+ E, *this);
+ case NVPTX::BI__nvvm_fmax_nan_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_nan_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_nan_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmax_nan_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_nan_xorsign_abs_f16x2, BuiltinID,
+ E, *this);
+ case NVPTX::BI__nvvm_fmax_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmax_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmax_xorsign_abs_f16x2, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmin_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_ftz_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_ftz_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_ftz_nan_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_ftz_nan_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_f16x2, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16, BuiltinID,
+ E, *this);
+ case NVPTX::BI__nvvm_fmin_ftz_nan_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_f16x2,
+ BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmin_ftz_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_ftz_xorsign_abs_f16x2, BuiltinID,
+ E, *this);
+ case NVPTX::BI__nvvm_fmin_nan_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_nan_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_nan_f16x2, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmin_nan_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_nan_xorsign_abs_f16x2, BuiltinID,
+ E, *this);
+ case NVPTX::BI__nvvm_fmin_xorsign_abs_f16:
+ return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_fmin_xorsign_abs_f16x2:
+ return MakeHalfType(Intrinsic::nvvm_fmin_xorsign_abs_f16x2, BuiltinID, E,
+ *this);
+ case NVPTX::BI__nvvm_ldg_h:
+ return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ldg_h2:
+ return MakeHalfType(Intrinsic::nvvm_ldg_global_f, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ldu_h:
+ return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
+ case NVPTX::BI__nvvm_ldu_h2: {
+ return MakeHalfType(Intrinsic::nvvm_ldu_global_f, BuiltinID, E, *this);
+ }
default:
return nullptr;
}
diff --git a/clang/test/CodeGen/builtins-nvptx-native-half-type-err.c b/clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
index 5c13c4e9b454c..7e23d3354b023 100644
--- a/clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
+++ b/clang/test/CodeGen/builtins-nvptx-native-half-type-err.c
@@ -1,21 +1,119 @@
// REQUIRES: nvptx-registered-target
//
// RUN: not %clang_cc1 -fsyntax-only -ffp-contract=off -triple nvptx-unknown-unknown -target-cpu \
-// RUN: sm_75 -target-feature +ptx70 -fcuda-is-device -x cuda -emit-llvm -o - %s 2>&1 \
-// RUN: | FileCheck -check-prefix=CHECK-ERROR %s
+// RUN: sm_86 -target-feature +ptx72 -fcuda-is-device -x cuda -emit-llvm -o - %s 2>&1 \
+// RUN: | FileCheck -check-prefix=CHECK_ERROR %s
#define __device__ __attribute__((device))
typedef __fp16 __fp16v2 __attribute__((ext_vector_type(2)));
-__device__ void nvvm_ldg_ldu_native_half_types(const void *p) {
- __nvvm_ldg_h((const __fp16 *)p);
- __nvvm_ldg_h2((const __fp16v2 *)p);
+__device__ void nvvm_native_half_types(void *a, void*b, void*c, __fp16* out) {
+ __fp16v2 resv2 = {0, 0};
+ *out += __nvvm_ex2_approx_f16(*(__fp16 *)a);
+ resv2 = __nvvm_ex2_approx_f16x2(*(__fp16v2*)a);
- __nvvm_ldu_h((const __fp16 *)p);
- __nvvm_ldu_h2((const __fp16v2 *)p);
+ *out += __nvvm_fma_rn_relu_f16(*(__fp16*)a, *(__fp16*)b, *(__fp16*)c);
+ *out += __nvvm_fma_rn_ftz_relu_f16(*(__fp16*)a, *(__fp16*)b, *(__fp16 *)c);
+ resv2 += __nvvm_fma_rn_relu_f16x2(*(__fp16v2*)a, *(__fp16v2*)b, *(__fp16v2*)c);
+ resv2 += __nvvm_fma_rn_ftz_relu_f16x2(*(__fp16v2*)a, *(__fp16v2*)b, *(__fp16v2*)c);
+ *out += __nvvm_fma_rn_ftz_f16(*(__fp16*)a, *(__fp16*)b, *(__fp16*)c);
+ *out += __nvvm_fma_rn_sat_f16(*(__fp16*)a, *(__fp16*)b, *(__fp16*)c);
+ *out += __nvvm_fma_rn_ftz_sat_f16(*(__fp16*)a, *(__fp16*)b, *(__fp16*)c);
+ resv2 += __nvvm_fma_rn_f16x2(*(__fp16v2*)a, *(__fp16v2*)b, *(__fp16v2*)c);
+ resv2 += __nvvm_fma_rn_ftz_f16x2(*(__fp16v2*)a, *(__fp16v2*)b, *(__fp16v2*)c);
+ resv2 += __nvvm_fma_rn_sat_f16x2(*(__fp16v2*)a, *(__fp16v2*)b, *(__fp16v2*)c);
+ resv2 += __nvvm_fma_rn_ftz_sat_f16x2(*(__fp16v2*)a, *(__fp16v2*)b, *(__fp16v2*)c);
+
+ *out += __nvvm_fmin_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmin_ftz_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmin_nan_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmin_ftz_nan_f16(*(__fp16*)a, *(__fp16*)b);
+ resv2 += __nvvm_fmin_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ resv2 += __nvvm_fmin_ftz_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ resv2 += __nvvm_fmin_nan_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ resv2 += __nvvm_fmin_ftz_nan_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ *out += __nvvm_fmin_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmin_ftz_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmin_nan_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmin_ftz_nan_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ resv2 += __nvvm_fmin_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+ resv2 += __nvvm_fmin_ftz_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+ resv2 += __nvvm_fmin_nan_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+ resv2 += __nvvm_fmin_ftz_nan_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+
+ *out += __nvvm_fmax_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmax_ftz_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmax_nan_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmax_ftz_nan_f16(*(__fp16*)a, *(__fp16*)b);
+ resv2 += __nvvm_fmax_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ resv2 += __nvvm_fmax_ftz_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ resv2 += __nvvm_fmax_nan_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ resv2 += __nvvm_fmax_ftz_nan_f16x2(*(__fp16v2*)a , *(__fp16v2*)b);
+ *out += __nvvm_fmax_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmax_ftz_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmax_nan_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ *out += __nvvm_fmax_ftz_nan_xorsign_abs_f16(*(__fp16*)a, *(__fp16*)b);
+ resv2 += __nvvm_fmax_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+ resv2 += __nvvm_fmax_ftz_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+ resv2 += __nvvm_fmax_nan_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+ resv2 += __nvvm_fmax_ftz_nan_xorsign_abs_f16x2(*(__fp16v2*)a, *(__fp16v2*)b);
+
+ *out += __nvvm_ldg_h((__fp16 *)a);
+ resv2 += __nvvm_ldg_h2((__fp16v2 *)a);
+
+ *out += __nvvm_ldu_h((__fp16 *)a);
+ resv2 += __nvvm_ldu_h2((__fp16v2 *)a);
+
+ *out += resv2[0] + resv2[1];
}
-// CHECK-ERROR: error: __nvvm_ldg_h requires native half type support.
-// CHECK-ERROR: error: __nvvm_ldg_h2 requires native half type support.
-// CHECK-ERROR: error: __nvvm_ldu_h requires native half type support.
-// CHECK-ERROR: error: __nvvm_ldu_h2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_ex2_approx_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_ex2_approx_f16x2 requires native half type support.
+
+// CHECK_ERROR: error: __nvvm_fma_rn_relu_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_ftz_relu_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_relu_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_ftz_relu_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_ftz_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_sat_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_ftz_sat_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_ftz_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_sat_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fma_rn_ftz_sat_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_nan_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_nan_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_nan_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_nan_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_nan_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_nan_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_nan_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmin_ftz_nan_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_nan_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_nan_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_nan_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_nan_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_nan_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_nan_xorsign_abs_f16 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_nan_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_fmax_ftz_nan_xorsign_abs_f16x2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_ldg_h requires native half type support.
+// CHECK_ERROR: error: __nvvm_ldg_h2 requires native half type support.
+// CHECK_ERROR: error: __nvvm_ldu_h requires native half type support.
+// CHECK_ERROR: error: __nvvm_ldu_h2 requires native half type support.
diff --git a/llvm/include/llvm/IR/IntrinsicsNVVM.td b/llvm/include/llvm/IR/IntrinsicsNVVM.td
index b859958e90040..e0cb64ccc9942 100644
--- a/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -583,7 +583,6 @@ let TargetPrefix = "nvvm" in {
"_xorsign_abs_f16", "_ftz_xorsign_abs_f16", "_nan_xorsign_abs_f16",
"_ftz_nan_xorsign_abs_f16"] in {
def int_nvvm_f # operation # variant :
- ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty, llvm_half_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
}
@@ -592,7 +591,6 @@ let TargetPrefix = "nvvm" in {
"_ftz_nan_f16x2", "_xorsign_abs_f16x2", "_ftz_xorsign_abs_f16x2",
"_nan_xorsign_abs_f16x2", "_ftz_nan_xorsign_abs_f16x2"] in {
def int_nvvm_f # operation # variant :
- ClangBuiltin<!strconcat("__nvvm_f", operation, variant)>,
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty, llvm_v2f16_ty],
[IntrNoMem, IntrSpeculatable, Commutative]>;
}
@@ -828,9 +826,9 @@ let TargetPrefix = "nvvm" in {
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_ex2_approx_d : ClangBuiltin<"__nvvm_ex2_approx_d">,
DefaultAttrsIntrinsic<[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>;
- def int_nvvm_ex2_approx_f16 : ClangBuiltin<"__nvvm_ex2_approx_f16">,
+ def int_nvvm_ex2_approx_f16 :
DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
- def int_nvvm_ex2_approx_f16x2 : ClangBuiltin<"__nvvm_ex2_approx_f16x2">,
+ def int_nvvm_ex2_approx_f16x2 :
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;
def int_nvvm_lg2_approx_ftz_f : ClangBuiltin<"__nvvm_lg2_approx_ftz_f">,
@@ -860,18 +858,16 @@ let TargetPrefix = "nvvm" in {
foreach variant = ["_rn_f16", "_rn_ftz_f16", "_rn_sat_f16",
"_rn_ftz_sat_f16", "_rn_relu_f16", "_rn_ftz_relu_f16"] in {
- def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
- DefaultAttrsIntrinsic<[llvm_half_ty],
- [llvm_half_ty, llvm_half_ty, llvm_half_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_half_ty],
+ [llvm_half_ty, llvm_half_ty, llvm_half_ty],
+ [IntrNoMem, IntrSpeculatable]>;
}
foreach variant = ["_rn_f16x2", "_rn_ftz_f16x2", "_rn_sat_f16x2",
"_rn_ftz_sat_f16x2", "_rn_relu_f16x2", "_rn_ftz_relu_f16x2"] in {
- def int_nvvm_fma # variant : ClangBuiltin<!strconcat("__nvvm_fma", variant)>,
- DefaultAttrsIntrinsic<[llvm_v2f16_ty],
- [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
- [IntrNoMem, IntrSpeculatable]>;
+ def int_nvvm_fma # variant : DefaultAttrsIntrinsic<[llvm_v2f16_ty],
+ [llvm_v2f16_ty, llvm_v2f16_ty, llvm_v2f16_ty],
+ [IntrNoMem, IntrSpeculatable]>;
}
foreach variant = ["_rn_bf16", "_rn_relu_bf16"] in {
More information about the llvm-commits
mailing list