[llvm] [SPIRV] Support for the SPV_INTEL_subgroup_matrix_multiply_accumulate SPIR-V extension (PR #135225)
Vyacheslav Levytskyy via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 10:54:28 PDT 2025
https://github.com/VyacheslavLevytskyy created https://github.com/llvm/llvm-project/pull/135225
Adds support for the SPV_INTEL_subgroup_matrix_multiply_accumulate SPIR-V extension according to https://github.com/KhronosGroup/SPIRV-Registry/blob/main/extensions/INTEL/SPV_INTEL_subgroup_matrix_multiply_accumulate.asciidoc
>From 7080789d29a9db2122e332bead8ef34a55c9f7b9 Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Thu, 10 Apr 2025 09:48:02 -0700
Subject: [PATCH 1/2] support for SPV_INTEL_subgroup_matrix_multiply_accumulate
---
llvm/docs/SPIRVUsage.rst | 2 +
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 4 +
llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 3 +
llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 4 +
llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 14 ++
.../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 2 +
...roup_matrix_multiply_accumulate_generic.ll | 229 ++++++++++++++++++
7 files changed, 258 insertions(+)
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll
diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst
index 406dfbea20b73..6ff8034cac00c 100644
--- a/llvm/docs/SPIRVUsage.rst
+++ b/llvm/docs/SPIRVUsage.rst
@@ -211,6 +211,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na
- Adds the ability to specify the maximum error for floating-point operations.
* - ``SPV_INTEL_ternary_bitwise_function``
- Adds a bitwise instruction on three operands and a look-up table index for specifying the bitwise operation to perform.
+ * - ``SPV_INTEL_subgroup_matrix_multiply_accumulate``
+ - Adds an instruction to compute the matrix product of an M x K matrix with a K x N matrix and then add an M x N matrix.
To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use:
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index b504e7b04d336..a3f27dde76b65 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -763,6 +763,7 @@ class GroupBuiltin<string name, Op operation> {
bit NoGroupOperation = !or(IsElect, IsAllOrAny, IsAllEqual,
IsBallot, IsInverseBallot,
IsBallotBitExtract, IsBallotFindBit,
+ !eq(operation, OpSubgroupMatrixMultiplyAccumulateINTEL),
!eq(operation, OpGroupNonUniformShuffle),
!eq(operation, OpGroupNonUniformShuffleXor),
!eq(operation, OpGroupNonUniformShuffleUp),
@@ -847,6 +848,9 @@ defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBallotFindLSB", 2, 2
defm : DemangledGroupBuiltin<"group_ballot_find_msb", OnlySub, OpGroupNonUniformBallotFindMSB>;
defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformBallotFindMSB", 2, 2, OpGroupNonUniformBallotFindMSB>;
+// SPV_INTEL_subgroup_matrix_multiply_accumulate
+defm : DemangledGroupBuiltinWrapper<"__spirv_SubgroupMatrixMultiplyAccumulateINTEL", 4, 5, OpSubgroupMatrixMultiplyAccumulateINTEL>;
+
// cl_khr_subgroup_shuffle
defm : DemangledGroupBuiltin<"group_shuffle", OnlySub, OpGroupNonUniformShuffle>;
defm : DemangledGroupBuiltinWrapper<"__spirv_GroupNonUniformShuffle", 3, 3, OpGroupNonUniformShuffle>;
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 53e88aa485568..ad0bc5a904682 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -93,6 +93,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::SPV_INTEL_long_composites},
{"SPV_INTEL_fp_max_error",
SPIRV::Extension::Extension::SPV_INTEL_fp_max_error},
+ {"SPV_INTEL_subgroup_matrix_multiply_accumulate",
+ SPIRV::Extension::Extension::
+ SPV_INTEL_subgroup_matrix_multiply_accumulate},
{"SPV_INTEL_ternary_bitwise_function",
SPIRV::Extension::Extension::SPV_INTEL_ternary_bitwise_function}};
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 53064ebb51271..6d8c84945d7d4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -736,6 +736,10 @@ def OpGroupFMax: OpGroup<"FMax", 269>;
def OpGroupUMax: OpGroup<"UMax", 270>;
def OpGroupSMax: OpGroup<"SMax", 271>;
+def OpSubgroupMatrixMultiplyAccumulateINTEL: Op<6237, (outs ID:$res),
+ (ins TYPE:$ty, ID:$KDim, ID:$A, ID:$B, ID:$C, variable_ops),
+ "$res = OpSubgroupMatrixMultiplyAccumulateINTEL $ty $KDim $A $B $C">;
+
// TODO: 3.42.22. Device-Side Enqueue Instructions
def OpEnqueueKernel: Op<292, (outs ID:$res), (ins TYPE:$type, ID:$queue, ID:$flags, ID:$NDR, ID:$nevents, ID:$wevents,
ID:$revent, ID:$invoke, ID:$param, ID:$psize, ID:$palign, variable_ops),
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index b1e5e4328cd32..6e1c41d9f20cb 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1799,6 +1799,20 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::LongCompositesINTEL);
break;
}
+ case SPIRV::OpSubgroupMatrixMultiplyAccumulateINTEL: {
+ if (!ST.canUseExtension(
+ SPIRV::Extension::SPV_INTEL_subgroup_matrix_multiply_accumulate))
+ report_fatal_error(
+ "OpSubgroupMatrixMultiplyAccumulateINTEL instruction requires the "
+ "following SPIR-V "
+ "extension: SPV_INTEL_subgroup_matrix_multiply_accumulate",
+ false);
+ Reqs.addExtension(
+ SPIRV::Extension::SPV_INTEL_subgroup_matrix_multiply_accumulate);
+ Reqs.addCapability(
+ SPIRV::Capability::SubgroupMatrixMultiplyAccumulateINTEL);
+ break;
+ }
case SPIRV::OpBitwiseFunctionINTEL: {
if (!ST.canUseExtension(
SPIRV::Extension::SPV_INTEL_ternary_bitwise_function))
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 0db8a37f8683c..afd3a5206926c 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -314,6 +314,7 @@ defm SPV_INTEL_long_composites : ExtensionOperand<117>;
defm SPV_INTEL_memory_access_aliasing : ExtensionOperand<118>;
defm SPV_INTEL_fp_max_error : ExtensionOperand<119>;
defm SPV_INTEL_ternary_bitwise_function : ExtensionOperand<120>;
+defm SPV_INTEL_subgroup_matrix_multiply_accumulate : ExtensionOperand<121>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -515,6 +516,7 @@ defm BindlessImagesINTEL : CapabilityOperand<6528, 0, 0, [SPV_INTEL_bindless_ima
defm MemoryAccessAliasingINTEL : CapabilityOperand<5910, 0, 0, [SPV_INTEL_memory_access_aliasing], []>;
defm FPMaxErrorINTEL : CapabilityOperand<6169, 0, 0, [SPV_INTEL_fp_max_error], []>;
defm TernaryBitwiseFunctionINTEL : CapabilityOperand<6241, 0, 0, [SPV_INTEL_ternary_bitwise_function], []>;
+defm SubgroupMatrixMultiplyAccumulateINTEL : CapabilityOperand<6236, 0, 0, [SPV_INTEL_subgroup_matrix_multiply_accumulate], []>;
//===----------------------------------------------------------------------===//
// Multiclass used to define SourceLanguage enum values and at the same time
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll
new file mode 100644
index 0000000000000..492b3f0181fba
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll
@@ -0,0 +1,229 @@
+; Adapted from Khronos Translator: subgroup_matrix_multiply_accumulate_generic.ll
+
+; generated with mma.cl:
+; #pragma OPENCL EXTENSION cl_khr_fp16 : enable
+;
+; // all combinations of parameter types
+; int __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int Matrix_A, int8 Matrix_B, int Matrix_C, int Operands);
+; int2 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int2 Matrix_A, int8 Matrix_B, int2 Matrix_C, int Operands);
+; int4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int4 Matrix_A, int8 Matrix_B, int4 Matrix_C, int Operands);
+; int8 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int8 Matrix_A, int8 Matrix_B, int8 Matrix_C, int Operands);
+;
+; float __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int Matrix_A, int8 Matrix_B, float Matrix_C, int Operands);
+; float2 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int2 Matrix_A, int8 Matrix_B, float2 Matrix_C, int Operands);
+; float4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int4 Matrix_A, int8 Matrix_B, float4 Matrix_C, int Operands);
+; float8 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, int8 Matrix_A, int8 Matrix_B, float8 Matrix_C, int Operands);
+;
+; int __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short Matrix_A, int8 Matrix_B, int Matrix_C, int Operands);
+; int2 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short2 Matrix_A, int8 Matrix_B, int2 Matrix_C, int Operands);
+; int4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short4 Matrix_A, int8 Matrix_B, int4 Matrix_C, int Operands);
+; int8 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short8 Matrix_A, int8 Matrix_B, int8 Matrix_C, int Operands);
+;
+; float __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short Matrix_A, int8 Matrix_B, float Matrix_C, int Operands);
+; float2 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short2 Matrix_A, int8 Matrix_B, float2 Matrix_C, int Operands);
+; float4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short4 Matrix_A, int8 Matrix_B, float4 Matrix_C, int Operands);
+; float8 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short8 Matrix_A, int8 Matrix_B, float8 Matrix_C, int Operands);
+;
+; half __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short Matrix_A, int8 Matrix_B, half Matrix_C, int Operands);
+; half2 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short2 Matrix_A, int8 Matrix_B, half2 Matrix_C, int Operands);
+; half4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short4 Matrix_A, int8 Matrix_B, half4 Matrix_C, int Operands);
+; half8 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short8 Matrix_A, int8 Matrix_B, half8 Matrix_C, int Operands);
+;
+; short __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short Matrix_A, int8 Matrix_B, short Matrix_C, int Operands);
+; short2 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short2 Matrix_A, int8 Matrix_B, short2 Matrix_C, int Operands);
+; short4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short4 Matrix_A, int8 Matrix_B, short4 Matrix_C, int Operands);
+; short8 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short8 Matrix_A, int8 Matrix_B, short8 Matrix_C, int Operands);
+;
+; float __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, float Matrix_A, float8 Matrix_B, float Matrix_C, int Operands);
+; float2 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, float2 Matrix_A, float8 Matrix_B, float2 Matrix_C, int Operands);
+; float4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, float4 Matrix_A, float8 Matrix_B, float4 Matrix_C, int Operands);
+; float8 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, float8 Matrix_A, float8 Matrix_B, float8 Matrix_C, int Operands);
+;
+; // no operands
+; float4 __spirv_SubgroupMatrixMultiplyAccumulateINTEL(int K_Dim, short4 Matrix_A, int8 Matrix_B, float4 Matrix_C);
+;
+; void foo(int iM, int2 iM2, int4 iM4, int8 iM8,
+; short sM, short2 sM2, short4 sM4, short8 sM8,
+; float fM, float2 fM2, float4 fM4, float8 fM8,
+; half hM, half2 hM2, half4 hM4, half8 hM8) {
+; const int i = 42;
+; int D = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM, iM8, iM, 0xA);
+; int2 D2 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM2, iM8, iM2, 0xA);
+; int4 D4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM4, iM8, iM4, 0xA);
+; int8 D8 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM8, iM8, iM8, 0xA);
+;
+; float fD = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM, iM8, fM, 0xA);
+; float2 fD2 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM2, iM8, fM2, 0xA);
+; float4 fD4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM4, iM8, fM4, 0xA);
+; float8 fD8 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, iM8, iM8, fM8, 0xA);
+;
+; int sD = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM, iM8, iM, 0xA);
+; int2 sD2 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM2, iM8, iM2, 0xA);
+; int4 sD4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM4, iM8, iM4, 0xA);
+; int8 sD8 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM8, iM8, iM8, 0xA);
+;
+; float sfD = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM, iM8, fM, 0xA);
+; float2 sfD2 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM2, iM8, fM2, 0xA);
+; float4 sfD4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM4, iM8, fM4, 0xA);
+; float8 sfD8 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM8, iM8, fM8, 0xA);
+;
+; half hD = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM, iM8, hM, 0xA);
+; half2 hD2 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM2, iM8, hM2, 0xA);
+; half4 hD4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM4, iM8, hM4, 0xA);
+; half8 hD8 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM8, iM8, hM8, 0xA);
+;
+; short ssD = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM, iM8, sM, 0xA);
+; short2 ssD2 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM2, iM8, sM2, 0xA);
+; short4 ssD4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM4, iM8, sM4, 0xA);
+; short8 ssD8 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM8, iM8, sM8, 0xA);
+;
+; float ffD = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, fM, fM8, fM, 0xA);
+; float2 ffD2 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, fM2, fM8, fM2, 0xA);
+; float4 ffD4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, fM4, fM8, fM4, 0xA);
+; float8 ffD8 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, fM8, fM8, fM8, 0xA);
+;
+; float4 noOpD4 = __spirv_SubgroupMatrixMultiplyAccumulateINTEL(i, sM4, iM8, fM4);
+; }
+; clang -cc1 -cl-std=clc++2021 -triple spir64-unknown-unknown -emit-llvm -finclude-default-header mma.cl -o tmp.ll
+
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: requires the following SPIR-V extension: SPV_INTEL_subgroup_matrix_multiply_accumulate
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate %s -o - -filetype=obj | spirv-val %}
+
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_subgroup_matrix_multiply_accumulate %s -o - -filetype=obj | spirv-val %}
+
+; CHECK: OpCapability SubgroupMatrixMultiplyAccumulateINTEL
+; CHECK: OpExtension "SPV_INTEL_subgroup_matrix_multiply_accumulate"
+; CHECK-DAG: %[[#Int32Ty:]] = OpTypeInt 32 0
+; CHECK-DAG: %[[#Int16Ty:]] = OpTypeInt 16 0
+; CHECK-DAG: %[[#Const42:]] = OpConstant %[[#Int32Ty]] 42
+; CHECK-DAG: %[[#VoidTy:]] = OpTypeVoid
+; CHECK-DAG: %[[#Vec2Int32Ty:]] = OpTypeVector %[[#Int32Ty]] 2
+; CHECK-DAG: %[[#Vec4Int32Ty:]] = OpTypeVector %[[#Int32Ty]] 4
+; CHECK-DAG: %[[#Vec8Int32Ty:]] = OpTypeVector %[[#Int32Ty]] 8
+; CHECK-DAG: %[[#Vec2Int16Ty:]] = OpTypeVector %[[#Int16Ty]] 2
+; CHECK-DAG: %[[#Vec4Int16Ty:]] = OpTypeVector %[[#Int16Ty]] 4
+; CHECK-DAG: %[[#Vec8Int16Ty:]] = OpTypeVector %[[#Int16Ty]] 8
+; CHECK-DAG: %[[#FloatTy:]] = OpTypeFloat 32
+; CHECK-DAG: %[[#Vec2FloatTy:]] = OpTypeVector %[[#FloatTy]] 2
+; CHECK-DAG: %[[#Vec4FloatTy:]] = OpTypeVector %[[#FloatTy]] 4
+; CHECK-DAG: %[[#Vec8FloatTy:]] = OpTypeVector %[[#FloatTy]] 8
+; CHECK-DAG: %[[#HalfTy:]] = OpTypeFloat 16
+; CHECK-DAG: %[[#Vec2HalfTy:]] = OpTypeVector %[[#HalfTy]] 2
+; CHECK-DAG: %[[#Vec4HalfTy:]] = OpTypeVector %[[#HalfTy]] 4
+; CHECK-DAG: %[[#Vec8HalfTy:]] = OpTypeVector %[[#HalfTy]] 8
+; CHECK: %[[#iM:]] = OpFunctionParameter %[[#Int32Ty]]
+; CHECK: %[[#iM2:]] = OpFunctionParameter %[[#Vec2Int32Ty]]
+; CHECK: %[[#iM4:]] = OpFunctionParameter %[[#Vec4Int32Ty]]
+; CHECK: %[[#iM8:]] = OpFunctionParameter %[[#Vec8Int32Ty]]
+; CHECK: %[[#sM:]] = OpFunctionParameter %[[#Int16Ty]]
+; CHECK: %[[#sM2:]] = OpFunctionParameter %[[#Vec2Int16Ty]]
+; CHECK: %[[#sM4:]] = OpFunctionParameter %[[#Vec4Int16Ty]]
+; CHECK: %[[#sM8:]] = OpFunctionParameter %[[#Vec8Int16Ty]]
+; CHECK: %[[#fM:]] = OpFunctionParameter %[[#FloatTy]]
+; CHECK: %[[#fM2:]] = OpFunctionParameter %[[#Vec2FloatTy]]
+; CHECK: %[[#fM4:]] = OpFunctionParameter %[[#Vec4FloatTy]]
+; CHECK: %[[#fM8:]] = OpFunctionParameter %[[#Vec8FloatTy]]
+; CHECK: %[[#hM:]] = OpFunctionParameter %[[#HalfTy]]
+; CHECK: %[[#hM2:]] = OpFunctionParameter %[[#Vec2HalfTy]]
+; CHECK: %[[#hM4:]] = OpFunctionParameter %[[#Vec4HalfTy]]
+; CHECK: %[[#hM8:]] = OpFunctionParameter %[[#Vec8HalfTy]]
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM]] %[[#iM8]] %[[#iM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM2]] %[[#iM8]] %[[#iM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM4]] %[[#iM8]] %[[#iM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM8]] %[[#iM8]] %[[#iM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM]] %[[#iM8]] %[[#fM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM2]] %[[#iM8]] %[[#fM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM4]] %[[#iM8]] %[[#fM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM8]] %[[#iM8]] %[[#fM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#iM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#iM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#iM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#iM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#fM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#fM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#fM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#fM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#hM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#hM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#hM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#hM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#sM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#sM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#sM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#sM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM]] %[[#fM8]] %[[#fM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM2]] %[[#fM8]] %[[#fM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM4]] %[[#fM8]] %[[#fM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM8]] %[[#fM8]] %[[#fM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#fM4]]
+
+define spir_func void @foo(i32 %iM, <2 x i32> %iM2, <4 x i32> %iM4, <8 x i32> %iM8,
+ i16 signext %sM, <2 x i16> %sM2, <4 x i16> %sM4, <8 x i16> %sM8,
+ float %fM, <2 x float> %fM2, <4 x float> %fM4, <8 x float> %fM8,
+ half %hM, <2 x half> %hM2, <4 x half> %hM4, <8 x half> %hM8) {
+entry:
+ %call = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiiDv8_iii(i32 42, i32 %iM, <8 x i32> %iM8, i32 %iM, i32 10)
+ %call1 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_iDv8_iS_i(i32 42, <2 x i32> %iM2, <8 x i32> %iM8, <2 x i32> %iM2, i32 10)
+ %call2 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_iDv8_iS_i(i32 42, <4 x i32> %iM4, <8 x i32> %iM8, <4 x i32> %iM4, i32 10)
+ %call3 = call spir_func <8 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_iS_S_i(i32 42, <8 x i32> %iM8, <8 x i32> %iM8, <8 x i32> %iM8, i32 10)
+ %call4 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiiDv8_ifi(i32 42, i32 %iM, <8 x i32> %iM8, float %fM, i32 10)
+ %call5 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_iDv8_iDv2_fi(i32 42, <2 x i32> %iM2, <8 x i32> %iM8, <2 x float> %fM2, i32 10)
+ %call6 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_iDv8_iDv4_fi(i32 42, <4 x i32> %iM4, <8 x i32> %iM8, <4 x float> %fM4, i32 10)
+ %call7 = call spir_func <8 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_iS_Dv8_fi(i32 42, <8 x i32> %iM8, <8 x i32> %iM8, <8 x float> %fM8, i32 10)
+ %call8 = call spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32 42, i16 signext %sM, <8 x i32> %iM8, i32 %iM, i32 10)
+ %call9 = call spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iDv2_ii(i32 42, <2 x i16> %sM2, <8 x i32> %iM8, <2 x i32> %iM2, i32 10)
+ %call10 = call spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_ii(i32 42, <4 x i16> %sM4, <8 x i32> %iM8, <4 x i32> %iM4, i32 10)
+ %call11 = call spir_func <8 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iS0_i(i32 42, <8 x i16> %sM8, <8 x i32> %iM8, <8 x i32> %iM8, i32 10)
+ %call12 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32 42, i16 signext %sM, <8 x i32> %iM8, float %fM, i32 10)
+ %call13 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iDv2_fi(i32 42, <2 x i16> %sM2, <8 x i32> %iM8, <2 x float> %fM2, i32 10)
+ %call14 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_fi(i32 42, <4 x i16> %sM4, <8 x i32> %iM8, <4 x float> %fM4, i32 10)
+ %call15 = call spir_func <8 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iDv8_fi(i32 42, <8 x i16> %sM8, <8 x i32> %iM8, <8 x float> %fM8, i32 10)
+ %call16 = call spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iDhi(i32 42, i16 signext %sM, <8 x i32> %iM8, half %hM, i32 10)
+ %call17 = call spir_func <2 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iDv2_Dhi(i32 42, <2 x i16> %sM2, <8 x i32> %iM8, <2 x half> %hM2, i32 10)
+ %call18 = call spir_func <4 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_Dhi(i32 42, <4 x i16> %sM4, <8 x i32> %iM8, <4 x half> %hM4, i32 10)
+ %call19 = call spir_func <8 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iDv8_Dhi(i32 42, <8 x i16> %sM8, <8 x i32> %iM8, <8 x half> %hM8, i32 10)
+ %call20 = call spir_func signext i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_isi(i32 42, i16 signext %sM, <8 x i32> %iM8, i16 signext %sM, i32 10)
+ %call21 = call spir_func <2 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iS_i(i32 42, <2 x i16> %sM2, <8 x i32> %iM8, <2 x i16> %sM2, i32 10)
+ %call22 = call spir_func <4 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iS_i(i32 42, <4 x i16> %sM4, <8 x i32> %iM8, <4 x i16> %sM4, i32 10)
+ %call23 = call spir_func <8 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iS_i(i32 42, <8 x i16> %sM8, <8 x i32> %iM8, <8 x i16> %sM8, i32 10)
+ %call24 = call spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv8_ffi(i32 42, float %fM, <8 x float> %fM8, float %fM, i32 10)
+ %call25 = call spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_fDv8_fS_i(i32 42, <2 x float> %fM2, <8 x float> %fM8, <2 x float> %fM2, i32 10)
+ %call26 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_fDv8_fS_i(i32 42, <4 x float> %fM4, <8 x float> %fM8, <4 x float> %fM4, i32 10)
+ %call27 = call spir_func <8 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_fS_S_i(i32 42, <8 x float> %fM8, <8 x float> %fM8, <8 x float> %fM8, i32 10)
+ %call28 = call spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_f(i32 42, <4 x i16> %sM4, <8 x i32> %iM8, <4 x float> %fM4)
+ ret void
+}
+
+declare spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiiDv8_iii(i32, i32, <8 x i32>, i32, i32)
+declare spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_iDv8_iS_i(i32, <2 x i32>, <8 x i32>, <2 x i32>, i32)
+declare spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_iDv8_iS_i(i32, <4 x i32>, <8 x i32>, <4 x i32>, i32)
+declare spir_func <8 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_iS_S_i(i32, <8 x i32>, <8 x i32>, <8 x i32>, i32)
+declare spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiiDv8_ifi(i32, i32, <8 x i32>, float, i32)
+declare spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_iDv8_iDv2_fi(i32, <2 x i32>, <8 x i32>, <2 x float>, i32)
+declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_iDv8_iDv4_fi(i32, <4 x i32>, <8 x i32>, <4 x float>, i32)
+declare spir_func <8 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_iS_Dv8_fi(i32, <8 x i32>, <8 x i32>, <8 x float>, i32)
+declare spir_func i32 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iii(i32, i16 signext, <8 x i32>, i32, i32)
+declare spir_func <2 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iDv2_ii(i32, <2 x i16>, <8 x i32>, <2 x i32>, i32)
+declare spir_func <4 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_ii(i32, <4 x i16>, <8 x i32>, <4 x i32>, i32)
+declare spir_func <8 x i32> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iS0_i(i32, <8 x i16>, <8 x i32>, <8 x i32>, i32)
+declare spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_ifi(i32, i16 signext, <8 x i32>, float, i32)
+declare spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iDv2_fi(i32, <2 x i16>, <8 x i32>, <2 x float>, i32)
+declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_fi(i32, <4 x i16>, <8 x i32>, <4 x float>, i32)
+declare spir_func <8 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iDv8_fi(i32, <8 x i16>, <8 x i32>, <8 x float>, i32)
+declare spir_func half @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_iDhi(i32, i16 signext, <8 x i32>, half, i32)
+declare spir_func <2 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iDv2_Dhi(i32, <2 x i16>, <8 x i32>, <2 x half>, i32)
+declare spir_func <4 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_Dhi(i32, <4 x i16>, <8 x i32>, <4 x half>, i32)
+declare spir_func <8 x half> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iDv8_Dhi(i32, <8 x i16>, <8 x i32>, <8 x half>, i32)
+declare spir_func signext i16 @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELisDv8_isi(i32, i16 signext, <8 x i32>, i16 signext, i32)
+declare spir_func <2 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_sDv8_iS_i(i32, <2 x i16>, <8 x i32>, <2 x i16>, i32)
+declare spir_func <4 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iS_i(i32, <4 x i16>, <8 x i32>, <4 x i16>, i32)
+declare spir_func <8 x i16> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_sDv8_iS_i(i32, <8 x i16>, <8 x i32>, <8 x i16>, i32)
+declare spir_func float @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELifDv8_ffi(i32, float, <8 x float>, float, i32)
+declare spir_func <2 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv2_fDv8_fS_i(i32, <2 x float>, <8 x float>, <2 x float>, i32)
+declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_fDv8_fS_i(i32, <4 x float>, <8 x float>, <4 x float>, i32)
+declare spir_func <8 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv8_fS_S_i(i32, <8 x float>, <8 x float>, <8 x float>, i32)
+declare spir_func <4 x float> @_Z45__spirv_SubgroupMatrixMultiplyAccumulateINTELiDv4_sDv8_iDv4_f(i32, <4 x i16>, <8 x i32>, <4 x float>)
>From b76171ba808b67807a3e957223227ab5ec807f7e Mon Sep 17 00:00:00 2001
From: "Levytskyy, Vyacheslav" <vyacheslav.levytskyy at intel.com>
Date: Thu, 10 Apr 2025 10:52:08 -0700
Subject: [PATCH 2/2] add a test case
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 10 +++-
...roup_matrix_multiply_accumulate_generic.ll | 58 +++++++++----------
2 files changed, 37 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 16364ab30f280..e090fb67b3231 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -1161,9 +1161,15 @@ static bool generateGroupInst(const SPIRV::IncomingCall *Call,
MachineRegisterInfo *MRI = MIRBuilder.getMRI();
if (Call->isSpirvOp()) {
- if (GroupBuiltin->NoGroupOperation)
+ if (GroupBuiltin->NoGroupOperation) {
+ SmallVector<uint32_t, 1> ImmArgs;
+ if (GroupBuiltin->Opcode ==
+ SPIRV::OpSubgroupMatrixMultiplyAccumulateINTEL &&
+ Call->Arguments.size() > 4)
+ ImmArgs.push_back(getConstFromIntrinsic(Call->Arguments[4], MRI));
return buildOpFromWrapper(MIRBuilder, GroupBuiltin->Opcode, Call,
- GR->getSPIRVTypeID(Call->ReturnType));
+ GR->getSPIRVTypeID(Call->ReturnType), ImmArgs);
+ }
// Group Operation is a literal
Register GroupOpReg = Call->Arguments[1];
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll
index 492b3f0181fba..0cd6992936eeb 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_subgroup_matrix_multiply_accumulate/subgroup_matrix_multiply_accumulate_generic.ll
@@ -131,35 +131,35 @@
; CHECK: %[[#hM2:]] = OpFunctionParameter %[[#Vec2HalfTy]]
; CHECK: %[[#hM4:]] = OpFunctionParameter %[[#Vec4HalfTy]]
; CHECK: %[[#hM8:]] = OpFunctionParameter %[[#Vec8HalfTy]]
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM]] %[[#iM8]] %[[#iM]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM2]] %[[#iM8]] %[[#iM2]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM4]] %[[#iM8]] %[[#iM4]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#iM8]] %[[#iM8]] %[[#iM8]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM]] %[[#iM8]] %[[#fM]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM2]] %[[#iM8]] %[[#fM2]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM4]] %[[#iM8]] %[[#fM4]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#iM8]] %[[#iM8]] %[[#fM8]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#iM]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#iM2]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#iM4]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int32Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#iM8]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#fM]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#fM2]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#fM4]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#fM8]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#hM]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#hM2]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#hM4]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8HalfTy]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#hM8]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#sM]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#sM2]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#sM4]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int16Ty]] %[[#Id1:]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#sM8]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM]] %[[#fM8]] %[[#fM]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM2]] %[[#fM8]] %[[#fM2]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM4]] %[[#fM8]] %[[#fM4]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#fM8]] %[[#fM8]] %[[#fM8]] 10
-; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Id1:]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#fM4]]
+; CHECK: %[[#]] = OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int32Ty]] %[[#Const42]] %[[#iM]] %[[#iM8]] %[[#iM]] 10
+; CHECK: %[[#]] = OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int32Ty]] %[[#Const42]] %[[#iM2]] %[[#iM8]] %[[#iM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int32Ty]] %[[#Const42]] %[[#iM4]] %[[#iM8]] %[[#iM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int32Ty]] %[[#Const42]] %[[#iM8]] %[[#iM8]] %[[#iM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Const42]] %[[#iM]] %[[#iM8]] %[[#fM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Const42]] %[[#iM2]] %[[#iM8]] %[[#fM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Const42]] %[[#iM4]] %[[#iM8]] %[[#fM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Const42]] %[[#iM8]] %[[#iM8]] %[[#fM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int32Ty]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#iM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int32Ty]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#iM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int32Ty]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#iM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int32Ty]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#iM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#fM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#fM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#fM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#fM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#HalfTy]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#hM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2HalfTy]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#hM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4HalfTy]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#hM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8HalfTy]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#hM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Int16Ty]] %[[#Const42]] %[[#sM]] %[[#iM8]] %[[#sM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2Int16Ty]] %[[#Const42]] %[[#sM2]] %[[#iM8]] %[[#sM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4Int16Ty]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#sM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8Int16Ty]] %[[#Const42]] %[[#sM8]] %[[#iM8]] %[[#sM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#FloatTy]] %[[#Const42]] %[[#fM]] %[[#fM8]] %[[#fM]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec2FloatTy]] %[[#Const42]] %[[#fM2]] %[[#fM8]] %[[#fM2]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Const42]] %[[#fM4]] %[[#fM8]] %[[#fM4]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec8FloatTy]] %[[#Const42]] %[[#fM8]] %[[#fM8]] %[[#fM8]] 10
+; CHECK: OpSubgroupMatrixMultiplyAccumulateINTEL %[[#Vec4FloatTy]] %[[#Const42]] %[[#sM4]] %[[#iM8]] %[[#fM4]]
define spir_func void @foo(i32 %iM, <2 x i32> %iM2, <4 x i32> %iM4, <8 x i32> %iM8,
i16 signext %sM, <2 x i16> %sM2, <4 x i16> %sM4, <8 x i16> %sM8,
More information about the llvm-commits
mailing list