r328158 - [NVPTX] Make tensor shape part of WMMA intrinsic's name.
Artem Belevich via cfe-commits
cfe-commits at lists.llvm.org
Wed Mar 21 14:55:02 PDT 2018
Author: tra
Date: Wed Mar 21 14:55:02 2018
New Revision: 328158
URL: http://llvm.org/viewvc/llvm-project?rev=328158&view=rev
Log:
[NVPTX] Make tensor shape part of WMMA intrinsic's name.
This is needed for the upcoming implementation of the
new 8x32x16 and 32x8x16 variants of WMMA instructions
introduced in CUDA 9.1.
Differential Revision: https://reviews.llvm.org/D44719
Modified:
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=328158&r1=328157&r2=328158&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Wed Mar 21 14:55:02 2018
@@ -10515,23 +10515,23 @@ Value *CodeGenFunction::EmitNVPTXBuiltin
unsigned NumResults;
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_ld_a:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_a_f16_col_stride
- : Intrinsic::nvvm_wmma_load_a_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_a_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_b:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_b_f16_col_stride
- : Intrinsic::nvvm_wmma_load_b_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_b_f16_row_stride;
NumResults = 8;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f16_col_stride
- : Intrinsic::nvvm_wmma_load_c_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_load_c_f32_col_stride
- : Intrinsic::nvvm_wmma_load_c_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_load_c_f32_row_stride;
NumResults = 8;
break;
default:
@@ -10566,13 +10566,13 @@ Value *CodeGenFunction::EmitNVPTXBuiltin
// for some reason nvcc builtins use _c_.
switch (BuiltinID) {
case NVPTX::BI__hmma_m16n16k16_st_c_f16:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f16_col_stride
- : Intrinsic::nvvm_wmma_store_d_f16_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f16_row_stride;
NumResults = 4;
break;
case NVPTX::BI__hmma_m16n16k16_st_c_f32:
- IID = isColMajor ? Intrinsic::nvvm_wmma_store_d_f32_col_stride
- : Intrinsic::nvvm_wmma_store_d_f32_row_stride;
+ IID = isColMajor ? Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_col_stride
+ : Intrinsic::nvvm_wmma_m16n16k16_store_d_f32_row_stride;
break;
default:
llvm_unreachable("Unexpected builtin ID.");
@@ -10591,8 +10591,8 @@ Value *CodeGenFunction::EmitNVPTXBuiltin
return Result;
}
- // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf)
- // --> Intrinsic::nvvm_wmma_mma_sync<layout A,B><DType><CType><Satf>
+ // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
+ // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
@@ -10613,15 +10613,15 @@ Value *CodeGenFunction::EmitNVPTXBuiltin
bool Satf = SatfArg.getSExtValue();
// clang-format off
-#define MMA_VARIANTS(type) {{ \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_row_col_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_row_##type##_satfinite, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type, \
- Intrinsic::nvvm_wmma_mma_sync_col_col_##type##_satfinite \
+#define MMA_VARIANTS(type) {{ \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_row_col_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_row_##type##_satfinite, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type, \
+ Intrinsic::nvvm_wmma_m16n16k16_mma_col_col_##type##_satfinite \
}}
// clang-format on
Modified: cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu?rev=328158&r1=328157&r2=328158&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu (original)
+++ cfe/trunk/test/CodeGen/builtins-nvptx-sm_70.cu Wed Mar 21 14:55:02 2018
@@ -22,145 +22,145 @@ typedef unsigned long long uint64_t;
__device__ void nvvm_wmma(int *src, int *dst,
float *fsrc, float *fdst,
int ldm) {
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.row.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
__hmma_m16n16k16_ld_a(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.a.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.a.col.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_ld_a' needs target feature ptx60}}
__hmma_m16n16k16_ld_a(dst, src+1, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.row.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
__hmma_m16n16k16_ld_b(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.b.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.b.col.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_ld_b' needs target feature ptx60}}
__hmma_m16n16k16_ld_b(dst, src+2, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_ld_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f16(dst, src, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.row.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.row.stride.f32
// expected-error at +1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.load.c.sync.col.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.load.c.col.stride.f32
// expected-error at +1 {{'__hmma_m16n16k16_ld_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_ld_c_f32(fdst, fsrc, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f16
// expected-error at +1 {{'__hmma_m16n16k16_st_c_f16' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f16(dst, src, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.row.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.row.stride.f32
// expected-error at +1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.store.d.sync.col.m16n16k16.stride.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.store.d.col.stride.f32
// expected-error at +1 {{'__hmma_m16n16k16_st_c_f32' needs target feature ptx60}}
__hmma_m16n16k16_st_c_f32(fdst, fsrc, ldm, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f16(dst, src, src, src, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f16.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f16.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f16.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f16.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f16.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f16f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f16f32(dst, src, src, fsrc, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f16.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f16.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f16' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f16(fdst, src, src, src, 3, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.row.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.row.f32.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 0, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.row.col.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.row.col.f32.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 1, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.row.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.row.f32.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 2, 1);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 0);
- // CHECK: call {{.*}} @llvm.nvvm.wmma.mma.sync.col.col.m16n16k16.f32.f32.satfinite
+ // CHECK: call {{.*}} @llvm.nvvm.wmma.m16n16k16.mma.col.col.f32.f32.satfinite
// expected-error at +1 {{'__hmma_m16n16k16_mma_f32f32' needs target feature ptx60}}
__hmma_m16n16k16_mma_f32f32(fdst, src, src, fsrc, 3, 1);
}
More information about the cfe-commits
mailing list