[llvm-branch-commits] [clang] [AMDGPU][clang] Replace gfx940 and gfx941 with gfx942 in clang (PR #126762)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Feb 11 08:43:00 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Fabian Ritter (ritter-x2a)

<details>
<summary>Changes</summary>

gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.

This PR removes all occurrences of gfx940/gfx941 from clang that can be
removed without changes in the llvm directory. The
target-invalid-cpu-note/amdgcn.c test is not included here since it
tests a list of targets that is defined in
llvm/lib/TargetParser/TargetParser.cpp.

For SWDEV-512631

---

Patch is 41.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/126762.diff


20 Files Affected:

- (modified) clang/include/clang/Basic/Cuda.h (-2) 
- (modified) clang/lib/Basic/Cuda.cpp (-2) 
- (modified) clang/lib/Basic/Targets/NVPTX.cpp (-2) 
- (modified) clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp (-2) 
- (modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+1-1) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-features.cl (-4) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl (+1-1) 
- (renamed) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl (+55-55) 
- (renamed) clang/test/CodeGenOpenCL/builtins-fp-atomics-gfx942.cl (+17-17) 
- (modified) clang/test/Driver/amdgpu-macros.cl (-2) 
- (modified) clang/test/Driver/amdgpu-mcpu.cl (-4) 
- (modified) clang/test/Driver/cuda-bad-arch.cu (+1-1) 
- (modified) clang/test/Driver/hip-macros.hip (+4-6) 
- (modified) clang/test/Misc/target-invalid-cpu-note/nvptx.c (-2) 
- (renamed) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx942-param.cl (+1-1) 
- (modified) clang/test/SemaOpenCL/builtins-amdgcn-error-gfx950.cl (+1-1) 
- (renamed) clang/test/SemaOpenCL/builtins-amdgcn-gfx942-err.cl (+7-7) 


``````````diff
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index f33ba46233a7a..793cab1f4e84a 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -106,8 +106,6 @@ enum class OffloadArch {
   GFX90a,
   GFX90c,
   GFX9_4_GENERIC,
-  GFX940,
-  GFX941,
   GFX942,
   GFX950,
   GFX10_1_GENERIC,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 1bfec0b37c5ee..f45fb0eca3714 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -124,8 +124,6 @@ static const OffloadArchToStringMap arch_names[] = {
     GFX(90a),  // gfx90a
     GFX(90c),  // gfx90c
     {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"},
-    GFX(940),  // gfx940
-    GFX(941),  // gfx941
     GFX(942),  // gfx942
     GFX(950),  // gfx950
     {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"},
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 7d13c1f145440..547cf3dfa2be7 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -211,8 +211,6 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
       case OffloadArch::GFX90a:
       case OffloadArch::GFX90c:
       case OffloadArch::GFX9_4_GENERIC:
-      case OffloadArch::GFX940:
-      case OffloadArch::GFX941:
       case OffloadArch::GFX942:
       case OffloadArch::GFX950:
       case OffloadArch::GFX10_1_GENERIC:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index c13928f61a748..826ec4da8ea28 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -2302,8 +2302,6 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(const OMPRequiresDecl *D) {
       case OffloadArch::GFX90a:
       case OffloadArch::GFX90c:
       case OffloadArch::GFX9_4_GENERIC:
-      case OffloadArch::GFX940:
-      case OffloadArch::GFX941:
       case OffloadArch::GFX942:
       case OffloadArch::GFX950:
       case OffloadArch::GFX10_1_GENERIC:
diff --git a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
index 47fa3967fe237..37fca614c3111 100644
--- a/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu
@@ -11,7 +11,7 @@
 // RUN:   -fnative-half-arguments-and-returns | FileCheck -check-prefix=SAFE %s
 
 // RUN: %clang_cc1 -x hip %s -O3 -S -o - -triple=amdgcn-amd-amdhsa \
-// RUN:   -fcuda-is-device -target-cpu gfx940 -fnative-half-type \
+// RUN:   -fcuda-is-device -target-cpu gfx942 -fnative-half-type \
 // RUN:   -fnative-half-arguments-and-returns -munsafe-fp-atomics \
 // RUN:   | FileCheck -check-prefix=UNSAFE %s
 
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 633f1dec5e370..d12dcead6fadf 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -29,8 +29,6 @@
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx909 -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
-// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx950 -emit-llvm -o - %s | FileCheck --check-prefix=GFX950 %s
 // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
@@ -85,8 +83,6 @@
 // GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
 // GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
 // GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
 // GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
index 6593a8de566f6..f300b05fe798a 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp8.cl
@@ -1,5 +1,5 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940  -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942  -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1200 -emit-llvm -o - %s | FileCheck %s
 
 typedef float  v2f   __attribute__((ext_vector_type(2)));
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl
similarity index 98%
rename from clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl
rename to clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl
index a2f14c652c828..789f6e07240d7 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx940.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx942.cl
@@ -1,5 +1,5 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
-// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx940 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -cl-std=CL2.0 -O0 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s
 // REQUIRES: amdgpu-registered-target
 
 typedef unsigned int u32;
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl
index 521121f5e7e54..c91cf158948b9 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gfx950-err.cl
@@ -2,7 +2,7 @@
 // RUN:   -verify -o - %s
 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx90a -emit-llvm \
 // RUN:   -verify -o - %s
-// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx940 -emit-llvm \
+// RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx942 -emit-llvm \
 // RUN:   -verify -o - %s
 // RUN: %clang_cc1 -O0 -cl-std=CL2.0 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm \
 // RUN:   -verify -o - %s
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl
index 45d2fa18efd53..b3367202f824e 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl
@@ -5,7 +5,7 @@
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx906 -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90c -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1010 -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1030 -emit-llvm -o - %s | FileCheck %s
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx1100 -emit-llvm -o - %s | FileCheck %s
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
index 00346baa6ff84..79083c3c5f0f9 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-mfma.cl
@@ -1,7 +1,7 @@
 // REQUIRES: amdgpu-registered-target
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx908 -DMFMA_GFX908_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX908
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx90a -DMFMA_GFX90A_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX90A
-// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx940 -DMFMA_GFX940_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX940
+// RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx942 -DMFMA_GFX942_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX942
 // RUN: %clang_cc1 -triple amdgcn-unknown-unknown -target-cpu gfx950 -DMFMA_GFX950_TESTS -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-GFX950
 
 #pragma OPENCL EXTENSION cl_khr_fp64:enable
@@ -226,189 +226,189 @@ void test_mfma_f64_4x4x4f64(global double* out, double a, double b, double c)
 
 #endif // MFMA_GFX90A_TESTS
 
-#if defined(MFMA_GFX940_TESTS) || defined(MFMA_GFX950_TESTS)
-// CHECK-GFX940-LABEL: @test_mfma_i32_16x16x32_i8
-// CHECK-GFX940: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 %a, i64 %b, <4 x i32> %c, i32 0, i32 0, i32 0)
+#if defined(MFMA_GFX942_TESTS) || defined(MFMA_GFX950_TESTS)
+// CHECK-GFX942-LABEL: @test_mfma_i32_16x16x32_i8
+// CHECK-GFX942: call <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64 %a, i64 %b, <4 x i32> %c, i32 0, i32 0, i32 0)
 void test_mfma_i32_16x16x32_i8(global v4i* out, long a, long b, v4i c)
 {
   *out = __builtin_amdgcn_mfma_i32_16x16x32_i8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_i32_32x32x16_i8
-// CHECK-GFX940: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64 %a, i64 %b, <16 x i32> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_i32_32x32x16_i8
+// CHECK-GFX942: call <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64 %a, i64 %b, <16 x i32> %c, i32 0, i32 0, i32 0)
 void test_mfma_i32_32x32x16_i8(global v16i* out, long a, long b, v16i c)
 {
   *out = __builtin_amdgcn_mfma_i32_32x32x16_i8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x8_xf32
-// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> %a, <2 x float> %b, <4 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x8_xf32
+// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> %a, <2 x float> %b, <4 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_16x16x8_xf32(global v4f* out, v2f a, v2f b, v4f c)
 {
   *out = __builtin_amdgcn_mfma_f32_16x16x8_xf32(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x4_xf32
-// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> %a, <2 x float> %b, <16 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x4_xf32
+// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> %a, <2 x float> %b, <16 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_32x32x4_xf32(global v16f* out, v2f a, v2f b, v16f c)
 {
   *out = __builtin_amdgcn_mfma_f32_32x32x4_xf32(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_bf8_bf8
-// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_bf8_bf8
+// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_16x16x32_bf8_bf8(global v4f* out, long a, long b, v4f c)
 {
   *out = __builtin_amdgcn_mfma_f32_16x16x32_bf8_bf8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_bf8_fp8
-// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_bf8_fp8
+// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.bf8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_16x16x32_bf8_fp8(global v4f* out, long a, long b, v4f c)
 {
   *out = __builtin_amdgcn_mfma_f32_16x16x32_bf8_fp8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_fp8_bf8
-// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_fp8_bf8
+// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.bf8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_16x16x32_fp8_bf8(global v4f* out, long a, long b, v4f c)
 {
   *out = __builtin_amdgcn_mfma_f32_16x16x32_fp8_bf8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_16x16x32_fp8_fp8
-// CHECK-GFX940: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_16x16x32_fp8_fp8
+// CHECK-GFX942: call <4 x float> @llvm.amdgcn.mfma.f32.16x16x32.fp8.fp8(i64 %a, i64 %b, <4 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_16x16x32_fp8_fp8(global v4f* out, long a, long b, v4f c)
 {
   *out = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_bf8_bf8
-// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_bf8_bf8
+// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_32x32x16_bf8_bf8(global v16f* out, long a, long b, v16f c)
 {
   *out = __builtin_amdgcn_mfma_f32_32x32x16_bf8_bf8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_bf8_fp8
-// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_bf8_fp8
+// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_32x32x16_bf8_fp8(global v16f* out, long a, long b, v16f c)
 {
   *out = __builtin_amdgcn_mfma_f32_32x32x16_bf8_fp8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_fp8_bf8
-// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_fp8_bf8
+// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.bf8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_32x32x16_fp8_bf8(global v16f* out, long a, long b, v16f c)
 {
   *out = __builtin_amdgcn_mfma_f32_32x32x16_fp8_bf8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_mfma_f32_32x32x16_fp8_fp8
-// CHECK-GFX940: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_mfma_f32_32x32x16_fp8_fp8
+// CHECK-GFX942: call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.fp8.fp8(i64 %a, i64 %b, <16 x float> %c, i32 0, i32 0, i32 0)
 void test_mfma_f32_32x32x16_fp8_fp8(global v16f* out, long a, long b, v16f c)
 {
   *out = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8(a, b, c, 0, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x32_f16
-// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half> %a, <8 x half> %b, <4 x float> %c, i32 %idx, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x32_f16
+// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.f16(<4 x half> %a, <8 x half> %b, <4 x float> %c, i32 %idx, i32 0, i32 0)
 void test_smfmac_f32_16x16x32_f16(global v4f* out, v4h a, v8h b, v4f c, int idx)
 {
   *out = __builtin_amdgcn_smfmac_f32_16x16x32_f16(a, b, c, idx, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x16_f16
-// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half> %a, <8 x half> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x16_f16
+// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.f16(<4 x half> %a, <8 x half> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
 void test_smfmac_f32_32x32x16_f16(global v16f* out, v4h a, v8h b, v16f c, int idx)
 {
   *out = __builtin_amdgcn_smfmac_f32_32x32x16_f16(a, b, c, idx, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_smfmac_f32_16x16x32_bf16
-// CHECK-GFX940: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16> %a, <8 x i16> %b, <4 x float> %c, i32 %idx, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_smfmac_f32_16x16x32_bf16
+// CHECK-GFX942: call <4 x float> @llvm.amdgcn.smfmac.f32.16x16x32.bf16(<4 x i16> %a, <8 x i16> %b, <4 x float> %c, i32 %idx, i32 0, i32 0)
 void test_smfmac_f32_16x16x32_bf16(global v4f* out, v4s a, v8s b, v4f c, int idx)
 {
   *out = __builtin_amdgcn_smfmac_f32_16x16x32_bf16(a, b, c, idx, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_smfmac_f32_32x32x16_bf16
-// CHECK-GFX940: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16> %a, <8 x i16> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
+// CHECK-GFX942-LABEL: @test_smfmac_f32_32x32x16_bf16
+// CHECK-GFX942: call <16 x float> @llvm.amdgcn.smfmac.f32.32x32x16.bf16(<4 x i16> %a, <8 x i16> %b, <16 x float> %c, i32 %idx, i32 0, i32 0)
 void test_smfmac_f32_32x32x16_bf16(global v16f* out, v4s a, v8s b, v16f c, int idx)
 {
   *out = __builtin_amdgcn_smfmac_f32_32x32x16_bf16(a, b, c, idx, 0, 0);
 }
 
-// CHECK-GFX940-LABEL: @test_smfmac_i32...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/126762


More information about the llvm-branch-commits mailing list