r315624 - [CUDA] Added __hmma_m16n16k16_* builtins to support mma instructions on sm_70
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Thu Oct 12 14:32:19 PDT 2017
Author: tra
Date: Thu Oct 12 14:32:19 2017
New Revision: 315624
URL: http://llvm.org/viewvc/llvm-project?rev=315624&view=rev
Log:
[CUDA] Added __hmma_m16n16k16_* builtins to support mma instructions on sm_70
Differential Revision: https://reviews.llvm.org/D38742
Added:
cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu
Modified:
cfe/trunk/include/clang/Basic/BuiltinsNVPTX.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
Modified: cfe/trunk/include/clang/Basic/BuiltinsNVPTX.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsNVPTX.def?rev=315624&r1=315623&r2=315624&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsNVPTX.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsNVPTX.def Thu Oct 12 14:32:19 2017
@@ -688,5 +688,18 @@ BUILTIN(__nvvm_ldg_f2, "E2fE2fC*", "")
BUILTIN(__nvvm_ldg_f4, "E4fE4fC*", "")
BUILTIN(__nvvm_ldg_d2, "E2dE2dC*", "")
+// Builtins to support WMMA instructions on sm_70
+TARGET_BUILTIN(__hmma_m16n16k16_ld_a, "vi*iC*UiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_ld_b, "vi*iC*UiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f16, "vi*iC*UiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_ld_c_f32, "vf*fC*UiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f16, "vi*i*UiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_st_c_f32, "vf*f*UiIi", "", "ptx60")
+
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f16, "vi*iC*iC*iC*IiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f16, "vf*iC*iC*iC*IiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f32f32, "vf*iC*iC*fC*IiIi", "", "ptx60")
+TARGET_BUILTIN(__hmma_m16n16k16_mma_f16f32, "vi*iC*iC*fC*IiIi", "", "ptx60")
+
#undef BUILTIN
#undef TARGET_BUILTIN
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=315624&r1=315623&r2=315624&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Oct 12 14:32:19 2017
@@ -9731,6 +9731,204 @@ Value *CodeGenFunction::EmitNVPTXBuiltin
Builder.CreateStore(Pred, PredOutPtr);
return Builder.CreateExtractValue(ResultPair, 0);
}
+ case NVPTX::BI__hmma_m16n16k16_ld_a:
+ case NVPTX::BI__hmma_m16n16k16_ld_b:
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f32: {
+ Address Dst = EmitPointerWithAlignment(E->getArg(0));
+ Value *Src = EmitScalarExpr(E->getArg(1));
+ Value *Ldm = EmitScalarExpr(E->getArg(2));
+ llvm::APSInt isColMajorArg;
+ if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+ return nullptr;
+ bool isColMajor = isColMajorArg.getSExtValue();
+ unsigned IID;
+ unsigned NumResults;
+ switch (BuiltinID) {
+ case NVPTX::BI__hmma_m16n16k16_ld_a:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_ld_b:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
+ NumResults = 8;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
+ NumResults = 8;
+ break;
+ default:
+ llvm_unreachable("Unexpected builtin ID.");
+ }
+ Value *Result =
+ Builder.CreateCall(CGM.getIntrinsic(IID),
+ {Builder.CreatePointerCast(Src, VoidPtrTy), Ldm});
+
+ // Save returned values.
+ for (unsigned i = 0; i < NumResults; ++i) {
+ Builder.CreateAlignedStore(
+ Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
+ Dst.getElementType()),
+ Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ }
+ return Result;
+ }
+
+ case NVPTX::BI__hmma_m16n16k16_st_c_f16:
+ case NVPTX::BI__hmma_m16n16k16_st_c_f32: {
+ Value *Dst = EmitScalarExpr(E->getArg(0));
+ Address Src = EmitPointerWithAlignment(E->getArg(1));
+ Value *Ldm = EmitScalarExpr(E->getArg(2));
+ llvm::APSInt isColMajorArg;
+ if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+ return nullptr;
+ bool isColMajor = isColMajorArg.getSExtValue();
+ unsigned IID;
+ unsigned NumResults = 8;
+ // PTX Instructions (and LLVM instrinsics) are defined for slice _d_, yet
+ // for some reason nvcc builtins use _c_.
+ switch (BuiltinID) {
+ case NVPTX::BI__hmma_m16n16k16_st_c_f16:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
+ NumResults = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_st_c_f32:
+ IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
+ break;
+ default:
+ llvm_unreachable("Unexpected builtin ID.");
+ }
+ Function *Intrinsic = CGM.getIntrinsic(IID);
+ llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
+ SmallVector<Value *, 10> Values;
+ Values.push_back(Builder.CreatePointerCast(Dst, VoidPtrTy));
+ for (unsigned i = 0; i < NumResults; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, ParamType));
+ }
+ Values.push_back(Ldm);
+ Value *Result = Builder.CreateCall(Intrinsic, Values);
+ return Result;
+ }
+
+ // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
+ // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f32: {
+ Address Dst = EmitPointerWithAlignment(E->getArg(0));
+ Address SrcA = EmitPointerWithAlignment(E->getArg(1));
+ Address SrcB = EmitPointerWithAlignment(E->getArg(2));
+ Address SrcC = EmitPointerWithAlignment(E->getArg(3));
+ llvm::APSInt LayoutArg;
+ if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
+ return nullptr;
+ int Layout = LayoutArg.getSExtValue();
+ if (Layout < 0 || Layout > 3)
+ return nullptr;
+ llvm::APSInt SatfArg;
+ if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
+ return nullptr;
+ bool Satf = SatfArg.getSExtValue();
+
+ // clang-format off
+#define MMA_VARIANTS(type) {{ \
+ Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \
+ Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \
+ }}
+ // clang-format on
+
+ auto getMMAIntrinsic = [Layout, Satf](std::array<unsigned, 8> Variants) {
+ unsigned Index = Layout * 2 + Satf;
+ assert(Index < 8);
+ return Variants[Index];
+ };
+ unsigned IID;
+ unsigned NumEltsC;
+ unsigned NumEltsD;
+ switch (BuiltinID) {
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f16_f16));
+ NumEltsC = 4;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f32_f16));
+ NumEltsC = 4;
+ NumEltsD = 8;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f16_f32));
+ NumEltsC = 8;
+ NumEltsD = 4;
+ break;
+ case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
+ IID = getMMAIntrinsic(MMA_VARIANTS(f32_f32));
+ NumEltsC = 8;
+ NumEltsD = 8;
+ break;
+ default:
+ llvm_unreachable("Unexpected builtin ID.");
+ }
+#undef MMA_VARIANTS
+
+ SmallVector<Value *, 24> Values;
+ Function *Intrinsic = CGM.getIntrinsic(IID);
+ llvm::Type *ABType = Intrinsic->getFunctionType()->getParamType(0);
+ // Load A
+ for (unsigned i = 0; i < 8; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(SrcA.getPointer(),
+ llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, ABType));
+ }
+ // Load B
+ for (unsigned i = 0; i < 8; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(SrcB.getPointer(),
+ llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, ABType));
+ }
+ // Load C
+ llvm::Type *CType = Intrinsic->getFunctionType()->getParamType(16);
+ for (unsigned i = 0; i < NumEltsC; ++i) {
+ Value *V = Builder.CreateAlignedLoad(
+ Builder.CreateGEP(SrcC.getPointer(),
+ llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ Values.push_back(Builder.CreateBitCast(V, CType));
+ }
+ Value *Result = Builder.CreateCall(Intrinsic, Values);
+ llvm::Type *DType = Dst.getElementType();
+ for (unsigned i = 0; i < NumEltsD; ++i)
+ Builder.CreateAlignedStore(
+ Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
+ Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
+ CharUnits::fromQuantity(4));
+ return Result;
+ }
default:
return nullptr;
}
Added: cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu?rev=315624&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu (added)
+++ cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu Thu Oct 12 14:32:19 2017
@@ -0,0 +1,166 @@
+// RUN: %clang_cc1 -triple nvptx64-unknown-unknown -target-cpu sm_70 \
+// RUN: -fcuda-is-device -target-feature +ptx60 \
+// RUN: -S -emit-llvm -o - -x cuda %s \
+// RUN: | FileCheck -check-prefix=CHECK %s
+// RUN: %clang_cc1 -triple nvptx-unknown-unknown -target-cpu sm_60 \
+// RUN: -fcuda-is-device -S -o /dev/null -x cuda -verify %s
+
+#if !defined(CUDA_VERSION)
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+
+typedef unsigned long long uint64_t;
+#endif
+// We have to keep all builtins that depend on particular target feature in the
+// same function, because the codegen will stop after the very first function
+// that encounters an error, so -verify will not be able to find errors in
+// subsequent functions.
+
+// CHECK-LABEL: nvvm_wmma
+__device__ void nvvm_wmma(int *src, int *dst,
+ float *fsrc, float *fdst,
+ int ldm) {
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.row.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_a(dst, src, ldm, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.col.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_a(dst, src+1, ldm, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.row.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_b(dst, src, ldm, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.col.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_b(dst, src+2, ldm, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
+ __hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
+ __hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
+ __hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
+ __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
+ __hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
+
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32.satfinite
+ // expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
+ __hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
+}
More information about the cfe-commits
mailing list