[Mlir-commits] [llvm] [mlir] [SPIR-V] Add support for the SPIR-V extension SPV_INTEL_tensor_float32_conversion (PR #150090)
llvmlistbot at llvm.org
llvmlistbot at llvm.org
Tue Jul 29 13:56:24 PDT 2025
https://github.com/YixingZhang007 updated https://github.com/llvm/llvm-project/pull/150090
>From 0f8308eac078448d42656eceab13f3702e934c14 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Tue, 22 Jul 2025 12:14:29 -0700
Subject: [PATCH 01/14] draft implementation for supporting
SPV_INTEL_tensor_float32_conversion
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 17 +++++++++++++--
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 21 ++++++++++++++++++-
llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 5 ++++-
llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 6 ++++++
.../lib/Target/SPIRV/SPIRVSymbolicOperands.td | 2 ++
5 files changed, 47 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 6ec7544767c52..1c7c1750af1c9 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -148,6 +148,7 @@ struct ConvertBuiltin {
bool IsSaturated;
bool IsRounded;
bool IsBfloat16;
+ bool IsTF32;
FPRoundingMode::FPRoundingMode RoundingMode;
};
@@ -2677,8 +2678,20 @@ static bool generateConvertInst(const StringRef DemangledCall,
}
} else if (GR->isScalarOrVectorOfType(Call->ReturnRegister,
SPIRV::OpTypeFloat)) {
- // Float -> Float
- Opcode = SPIRV::OpFConvert;
+ if(Builtin->IsTF32){
+ const auto *ST = static_cast<const SPIRVSubtarget *>(
+ &MIRBuilder.getMF().getSubtarget());
+ if (!ST->canUseExtension(
+ SPIRV::Extension::SPV_INTEL_bfloat16_conversion))
+ NeedExtMsg = "SPV_INTEL_bfloat16_conversion";
+ IsRightComponentsNumber =
+ GR->getScalarOrVectorComponentCount(Call->Arguments[0]) ==
+ GR->getScalarOrVectorComponentCount(Call->ReturnRegister);
+ Opcode = SPIRV::OpRoundFToTF32INTEL;
+ } else {
+ Float -> Float
+ Opcode = SPIRV::OpFConvert;
+ }
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index ea78dcd135267..326109c9fdff4 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1461,6 +1461,7 @@ class ConvertBuiltin<string name, InstructionSet set> {
bit IsRounded = !not(!eq(!find(name, "_rt"), -1));
bit IsBfloat16 = !or(!not(!eq(!find(name, "BF16"), -1)),
!not(!eq(!find(name, "bfloat16"), -1)));
+ bit IsTF32 = !not(!eq(!find(name, "TF32"), -1));
FPRoundingMode RoundingMode = !cond(!not(!eq(!find(name, "_rte"), -1)) : RTE,
!not(!eq(!find(name, "_rtz"), -1)) : RTZ,
!not(!eq(!find(name, "_rtp"), -1)) : RTP,
@@ -1472,7 +1473,7 @@ class ConvertBuiltin<string name, InstructionSet set> {
def ConvertBuiltins : GenericTable {
let FilterClass = "ConvertBuiltin";
let Fields = ["Name", "Set", "IsDestinationSigned", "IsSaturated",
- "IsRounded", "IsBfloat16", "RoundingMode"];
+ "IsRounded", "IsBfloat16", "IsTF32", "RoundingMode"];
string TypeOf_Set = "InstructionSet";
string TypeOf_RoundingMode = "FPRoundingMode";
}
@@ -1556,6 +1557,24 @@ foreach conv = ["FToBF16INTEL", "BF16ToFINTEL"] in {
def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>;
}
+// SPV_INTEL_tensor_float32_conversion
+// Multiclass used to define at the same time both a demangled builtin records
+// and a corresponding convert builtin records.
+multiclass DemangledTF32ConvertBuiltin<string name1, string name2> {
+ // Create records for scalar and vector conversions.
+ foreach i = ["", "2", "3", "4", "8", "16"] in {
+ def : DemangledBuiltin<!strconcat("intel_convert_", name1, i, name2, i), OpenCL_std, Convert, 1, 1>;
+ def : ConvertBuiltin<!strconcat("intel_convert_", name1, i, name2, i), OpenCL_std>;
+ }
+}
+
+defm : DemangledTF32ConvertBuiltin<"ConvertFToTF32INTEL">;
+
+foreach conv = ["FToTF32INTEL"] in {
+ def : DemangledBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std, Convert, 1, 1>;
+ def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>;
+}
+
//===----------------------------------------------------------------------===//
// Class defining a vector data load/store builtin record used for lowering
// into OpExtInst instruction.
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index 049ba0275f223..a04ed6a42c868 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -441,10 +441,13 @@ def OpBitcast : UnOp<"OpBitcast", 124>;
def OpPtrCastToCrossWorkgroupINTEL : UnOp<"OpPtrCastToCrossWorkgroupINTEL", 5934>;
def OpCrossWorkgroupCastToPtrINTEL : UnOp<"OpCrossWorkgroupCastToPtrINTEL", 5938>;
-// SPV_INTEL_bfloat16_conversion
+// SPV_INTEL_tensor_float32_conversion
def OpConvertFToBF16INTEL : UnOp<"OpConvertFToBF16INTEL", 6116>;
def OpConvertBF16ToFINTEL : UnOp<"OpConvertBF16ToFINTEL", 6117>;
+// SPV_INTEL_bfloat16_conversion
+def OpRoundFToTF32INTEL : UnOp<"OpRoundFToTF32INTEL", 6426>;
+
// 3.42.12 Composite Instructions
def OpVectorExtractDynamic: Op<77, (outs ID:$res), (ins TYPE:$type, vID:$vec, ID:$idx),
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index ad976e5288927..c252fc5897518 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1564,6 +1564,12 @@ void addInstrRequirements(const MachineInstr &MI,
Reqs.addCapability(SPIRV::Capability::BFloat16ConversionINTEL);
}
break;
+ case SPIRV::OpRoundFToTF32INTEL:
+ if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_tensor_float32_conversion)) {
+ Reqs.addExtension(SPIRV::Extension::SPV_INTEL_tensor_float32_conversion);
+ Reqs.addCapability(SPIRV::Capability::TF32ConversionINTEL);
+ }
+ break;
case SPIRV::OpVariableLengthArrayINTEL:
case SPIRV::OpSaveMemoryINTEL:
case SPIRV::OpRestoreMemoryINTEL:
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 548e9b717c161..7b2139a1c84a8 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -320,6 +320,7 @@ defm SPV_INTEL_subgroup_matrix_multiply_accumulate : ExtensionOperand<121>;
defm SPV_INTEL_2d_block_io : ExtensionOperand<122>;
defm SPV_INTEL_int4 : ExtensionOperand<123>;
defm SPV_KHR_float_controls2 : ExtensionOperand<124>;
+defm SPV_INTEL_tensor_float32_conversion : ExtensionOperand<125>;
//===----------------------------------------------------------------------===//
// Multiclass used to define Capabilities enum values and at the same time
@@ -502,6 +503,7 @@ defm VariableLengthArrayINTEL : CapabilityOperand<5817, 0, 0, [SPV_INTEL_variabl
defm GroupUniformArithmeticKHR : CapabilityOperand<6400, 0, 0, [SPV_KHR_uniform_group_instructions], []>;
defm USMStorageClassesINTEL : CapabilityOperand<5935, 0, 0, [SPV_INTEL_usm_storage_classes], [Kernel]>;
defm BFloat16ConversionINTEL : CapabilityOperand<6115, 0, 0, [SPV_INTEL_bfloat16_conversion], []>;
+defm TF32ConversionINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>;
defm GlobalVariableHostAccessINTEL : CapabilityOperand<6187, 0, 0, [SPV_INTEL_global_variable_host_access], []>;
defm HostAccessINTEL : CapabilityOperand<6188, 0, 0, [SPV_INTEL_global_variable_host_access], []>;
defm GlobalVariableFPGADecorationsINTEL : CapabilityOperand<6189, 0, 0, [SPV_INTEL_global_variable_fpga_decorations], []>;
>From 6c62b864a248cec084d464373dc08db9b0e68a14 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Tue, 22 Jul 2025 13:34:02 -0700
Subject: [PATCH 02/14] add a basic test
---
.../SPV_INTEL_tensor_float32_conversion/tf32-conv.ll | 11 +++++++++++
1 file changed, 11 insertions(+)
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
new file mode 100644
index 0000000000000..9864b80ecf2e0
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -0,0 +1,11 @@
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o -
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_func void @test(<8 x i16> %in) {
+ %res = tail call spir_func <8 x i16> @_Z27__spirv_RoundFToTF32INTELDv8_s(<8 x i16> %in)
+ ret void
+}
+
+declare spir_func float @_Z27__spirv_RoundFToTF32INTELDv8_s(<8 x i16>)
>From a1fc74f5fc94961b3fa1ea8447a40f3b52ff5f7f Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 23 Jul 2025 06:35:47 -0700
Subject: [PATCH 03/14] add test and suppurt the capacity
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 2 +-
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 18 ------------------
.../tf32-conv.ll | 2 +-
.../include/mlir/Dialect/SPIRV/IR/SPIRVBase.td | 18 ++++++++++++++----
4 files changed, 16 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 1c7c1750af1c9..8e8c34e2bd925 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -2689,7 +2689,7 @@ static bool generateConvertInst(const StringRef DemangledCall,
GR->getScalarOrVectorComponentCount(Call->ReturnRegister);
Opcode = SPIRV::OpRoundFToTF32INTEL;
} else {
- Float -> Float
+ // Float -> Float
Opcode = SPIRV::OpFConvert;
}
}
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 326109c9fdff4..8f96329329e17 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1557,24 +1557,6 @@ foreach conv = ["FToBF16INTEL", "BF16ToFINTEL"] in {
def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>;
}
-// SPV_INTEL_tensor_float32_conversion
-// Multiclass used to define at the same time both a demangled builtin records
-// and a corresponding convert builtin records.
-multiclass DemangledTF32ConvertBuiltin<string name1, string name2> {
- // Create records for scalar and vector conversions.
- foreach i = ["", "2", "3", "4", "8", "16"] in {
- def : DemangledBuiltin<!strconcat("intel_convert_", name1, i, name2, i), OpenCL_std, Convert, 1, 1>;
- def : ConvertBuiltin<!strconcat("intel_convert_", name1, i, name2, i), OpenCL_std>;
- }
-}
-
-defm : DemangledTF32ConvertBuiltin<"ConvertFToTF32INTEL">;
-
-foreach conv = ["FToTF32INTEL"] in {
- def : DemangledBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std, Convert, 1, 1>;
- def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>;
-}
-
//===----------------------------------------------------------------------===//
// Class defining a vector data load/store builtin record used for lowering
// into OpExtInst instruction.
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
index 9864b80ecf2e0..3329f3aace685 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -1,4 +1,4 @@
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o -
+; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - %.spvasm
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
index 90383265002a3..9ae04ae9d51ee 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
@@ -425,6 +425,8 @@ def SPV_NVX_multiview_per_view_attributes : I32EnumAttrCase<"SPV_NVX_multiview_p
def SPV_ARM_tensors : I32EnumAttrCase<"SPV_ARM_tensors", 6000>;
+def SPV_INTEL_tensor_float32_conversion : I32EnumAttrCase<"SPV_INTEL_tensor_float32_conversion", 6426>;
+
def SPIRV_ExtensionAttr :
SPIRV_I32EnumAttr<"Extension", "supported SPIR-V extensions", "ext", [
SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_device_group,
@@ -474,7 +476,8 @@ def SPIRV_ExtensionAttr :
SPV_NV_shader_image_footprint, SPV_NV_shader_sm_builtins,
SPV_NV_shader_subgroup_partitioned, SPV_NV_shading_rate,
SPV_NV_stereo_view_rendering, SPV_NV_viewport_array2, SPV_NV_bindless_texture,
- SPV_NV_ray_tracing_motion_blur, SPV_NVX_multiview_per_view_attributes
+ SPV_NV_ray_tracing_motion_blur, SPV_NVX_multiview_per_view_attributes,
+ SPV_INTEL_tensor_float32_conversion
]>;
//===----------------------------------------------------------------------===//
@@ -1465,6 +1468,12 @@ def SPIRV_C_Bfloat16ConversionINTEL : I32EnumAttrCase<"B
];
}
+def SPIRV_C_tensor_float32_conversion : I32EnumAttrCase<"SPIRV_C_tensor_float32_conversion", 6425> {
+ list<Availability> availability = [
+ Extension<[SPV_INTEL_tensor_float32_conversion]>
+ ];
+}
+
def SPIRV_C_CacheControlsINTEL : I32EnumAttrCase<"CacheControlsINTEL", 6441> {
list<Availability> availability = [
Extension<[SPV_INTEL_cache_controls]>
@@ -1565,7 +1574,7 @@ def SPIRV_CapabilityAttr :
SPIRV_C_UniformTexelBufferArrayNonUniformIndexing,
SPIRV_C_StorageTexelBufferArrayNonUniformIndexing,
SPIRV_C_ShaderViewportIndexLayerEXT, SPIRV_C_ShaderViewportMaskNV,
- SPIRV_C_ShaderStereoViewNV, SPIRV_C_Bfloat16ConversionINTEL,
+ SPIRV_C_ShaderStereoViewNV, SPIRV_C_Bfloat16ConversionINTEL, SPIRV_C_tensor_float32_conversion,
SPIRV_C_CacheControlsINTEL, SPIRV_C_BFloat16TypeKHR,
SPIRV_C_BFloat16DotProductKHR, SPIRV_C_BFloat16CooperativeMatrixKHR
]>;
@@ -4586,6 +4595,7 @@ def SPIRV_OC_OpControlBarrierArriveINTEL : I32EnumAttrCase<"OpControlBarrie
def SPIRV_OC_OpControlBarrierWaitINTEL : I32EnumAttrCase<"OpControlBarrierWaitINTEL", 6143>;
def SPIRV_OC_OpGroupIMulKHR : I32EnumAttrCase<"OpGroupIMulKHR", 6401>;
def SPIRV_OC_OpGroupFMulKHR : I32EnumAttrCase<"OpGroupFMulKHR", 6402>;
+def SPIRV_OC_OpRoundFToTF32INTEL : I32EnumAttrCase<"OpRoundFToTF32INTEL", 6426>;
def SPIRV_OpcodeAttr :
SPIRV_I32EnumAttr<"Opcode", "valid SPIR-V instructions", "opcode", [
@@ -4688,9 +4698,9 @@ def SPIRV_OpcodeAttr :
SPIRV_OC_OpEmitMeshTasksEXT, SPIRV_OC_OpSetMeshOutputsEXT,
SPIRV_OC_OpSubgroupBlockReadINTEL, SPIRV_OC_OpSubgroupBlockWriteINTEL,
SPIRV_OC_OpAssumeTrueKHR, SPIRV_OC_OpAtomicFAddEXT,
- SPIRV_OC_OpConvertFToBF16INTEL, SPIRV_OC_OpConvertBF16ToFINTEL,
+ SPIRV_OC_OpConvertFToBF16INTEL, SPIRV_OC_OpConvertBF16ToFINTEL,
SPIRV_OC_OpControlBarrierArriveINTEL, SPIRV_OC_OpControlBarrierWaitINTEL,
- SPIRV_OC_OpGroupIMulKHR, SPIRV_OC_OpGroupFMulKHR
+ SPIRV_OC_OpGroupIMulKHR, SPIRV_OC_OpGroupFMulKHR, SPIRV_OC_OpRoundFToTF32INTEL
]>;
// End opcode section. Generated from SPIR-V spec; DO NOT MODIFY!
>From 6dfcd8ebfe6da1c72c8e2a351ddf9c15065e9392 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 23 Jul 2025 08:53:33 -0700
Subject: [PATCH 04/14] support the mlir to spirv for this spirv instruction=
---
.../mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td | 44 +++++++++++++++++++
mlir/lib/Dialect/SPIRV/IR/CastOps.cpp | 41 +++++++++++++++++
2 files changed, 85 insertions(+)
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td
index 82d26e365fb24..66f172135c958 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td
@@ -198,6 +198,50 @@ def SPIRV_INTELControlBarrierWaitOp
```
}];
}
+// -----
+
+def SPIRV_INTELRoundFToTF32Op : SPIRV_IntelVendorOp<"RoundFToTF32", []> {
+ let summary = "See extension SPV_INTEL_tensor_float32_conversion";
+
+ let description = [{
+ Convert value numerically from a 32-bit floating point type to TF32 with rounding to the nearest even.
+
+ Result Type must be a scalar or vector of 32-bit floating-point type.
+
+ Float Value must have the same type as Result Type.
+
+ Results are computed per component.
+
+ #### Example:
+
+ ```mlir
+ %1 = spirv.RoundFToTF32 %0 : f32 to f32
+ %3 = spirv.RoundFToTF32 %2 : vector<3xf32> to vector<3xf32>
+ ```
+
+ }];
+
+
+ let availability = [
+ MinVersion<SPIRV_V_1_0>,
+ MaxVersion<SPIRV_V_1_6>,
+ Extension<[SPV_INTEL_tensor_float32_conversion]>,
+ Capability<[SPIRV_C_tensor_float32_conversion]>
+ ];
+
+ let arguments = (ins
+ SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$operand
+ );
+
+ let results = (outs
+ SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$result
+ );
+ let assemblyFormat = [{
+ $operand attr-dict `:` type($operand) `to` type($result)
+ }];
+
+ let hasVerifier = 1;
+}
// -----
diff --git a/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp b/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp
index e27dc274673be..170b501daa080 100644
--- a/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp
@@ -311,6 +311,47 @@ LogicalResult INTELConvertFToBF16Op::verify() {
return success();
}
+//===----------------------------------------------------------------------===//
+// spirv.INTELConvertFToBF16Op
+//===----------------------------------------------------------------------===//
+
+LogicalResult INTELConvertFToBF16Op::verify() {
+ auto operandType = getOperand().getType();
+ auto resultType = getResult().getType();
+ // ODS checks that vector result type and vector operand type have the same
+ // shape.
+ if (auto vectorType = llvm::dyn_cast<VectorType>(operandType)) {
+ unsigned operandNumElements = vectorType.getNumElements();
+ unsigned resultNumElements =
+ llvm::cast<VectorType>(resultType).getNumElements();
+ if (operandNumElements != resultNumElements) {
+ return emitOpError(
+ "operand and result must have same number of elements");
+ }
+ }
+ return success();
+}
+
+//===----------------------------------------------------------------------===//
+// spirv.INTELRoundFToTF32Op
+//===----------------------------------------------------------------------===//
+
+LogicalResult INTELRoundFToTF32Op::verify() {
+ auto operandType = getOperand().getType();
+ auto resultType = getResult().getType();
+ // ODS checks that vector result type and vector operand type have the same
+ // shape.
+ if (auto vectorType = llvm::dyn_cast<VectorType>(operandType)) {
+ unsigned operandNumElements = vectorType.getNumElements();
+ unsigned resultNumElements =
+ llvm::cast<VectorType>(resultType).getNumElements();
+ if (operandNumElements != resultNumElements) {
+ return emitOpError(
+ "operand and result must have same number of elements");
+ }
+ }
+ return success();
+}
//===----------------------------------------------------------------------===//
// spirv.FConvertOp
//===----------------------------------------------------------------------===//
>From 6f61755daf19e35cf280161c4d19f361ef0a3146 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 23 Jul 2025 11:37:49 -0700
Subject: [PATCH 05/14] mlir test pass
---
.../mlir/Dialect/SPIRV/IR/SPIRVBase.td | 17 ++--
.../mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td | 88 +++++++++----------
mlir/lib/Dialect/SPIRV/IR/CastOps.cpp | 22 +----
mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir | 36 ++++++++
4 files changed, 90 insertions(+), 73 deletions(-)
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
index 9ae04ae9d51ee..9c9eefd054fa6 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVBase.td
@@ -405,6 +405,7 @@ def SPV_INTEL_memory_access_aliasing : I32EnumAttrCase<"SPV_INTEL_me
def SPV_INTEL_split_barrier : I32EnumAttrCase<"SPV_INTEL_split_barrier", 4029>;
def SPV_INTEL_bfloat16_conversion : I32EnumAttrCase<"SPV_INTEL_bfloat16_conversion", 4031>;
def SPV_INTEL_cache_controls : I32EnumAttrCase<"SPV_INTEL_cache_controls", 4032>;
+def SPV_INTEL_tensor_float32_conversion : I32EnumAttrCase<"SPV_INTEL_tensor_float32_conversion", 4033>;
def SPV_NV_compute_shader_derivatives : I32EnumAttrCase<"SPV_NV_compute_shader_derivatives", 5000>;
def SPV_NV_cooperative_matrix : I32EnumAttrCase<"SPV_NV_cooperative_matrix", 5001>;
@@ -425,8 +426,6 @@ def SPV_NVX_multiview_per_view_attributes : I32EnumAttrCase<"SPV_NVX_multiview_p
def SPV_ARM_tensors : I32EnumAttrCase<"SPV_ARM_tensors", 6000>;
-def SPV_INTEL_tensor_float32_conversion : I32EnumAttrCase<"SPV_INTEL_tensor_float32_conversion", 6426>;
-
def SPIRV_ExtensionAttr :
SPIRV_I32EnumAttr<"Extension", "supported SPIR-V extensions", "ext", [
SPV_KHR_16bit_storage, SPV_KHR_8bit_storage, SPV_KHR_device_group,
@@ -1468,7 +1467,7 @@ def SPIRV_C_Bfloat16ConversionINTEL : I32EnumAttrCase<"B
];
}
-def SPIRV_C_tensor_float32_conversion : I32EnumAttrCase<"SPIRV_C_tensor_float32_conversion", 6425> {
+def SPIRV_C_TensorFloat32RoundingINTEL : I32EnumAttrCase<"TensorFloat32RoundingINTEL", 6425> {
list<Availability> availability = [
Extension<[SPV_INTEL_tensor_float32_conversion]>
];
@@ -1574,9 +1573,10 @@ def SPIRV_CapabilityAttr :
SPIRV_C_UniformTexelBufferArrayNonUniformIndexing,
SPIRV_C_StorageTexelBufferArrayNonUniformIndexing,
SPIRV_C_ShaderViewportIndexLayerEXT, SPIRV_C_ShaderViewportMaskNV,
- SPIRV_C_ShaderStereoViewNV, SPIRV_C_Bfloat16ConversionINTEL, SPIRV_C_tensor_float32_conversion,
+ SPIRV_C_ShaderStereoViewNV, SPIRV_C_Bfloat16ConversionINTEL,
SPIRV_C_CacheControlsINTEL, SPIRV_C_BFloat16TypeKHR,
- SPIRV_C_BFloat16DotProductKHR, SPIRV_C_BFloat16CooperativeMatrixKHR
+ SPIRV_C_BFloat16DotProductKHR, SPIRV_C_BFloat16CooperativeMatrixKHR,
+ SPIRV_C_TensorFloat32RoundingINTEL
]>;
def SPIRV_AM_Logical : I32EnumAttrCase<"Logical", 0>;
@@ -4595,7 +4595,7 @@ def SPIRV_OC_OpControlBarrierArriveINTEL : I32EnumAttrCase<"OpControlBarrie
def SPIRV_OC_OpControlBarrierWaitINTEL : I32EnumAttrCase<"OpControlBarrierWaitINTEL", 6143>;
def SPIRV_OC_OpGroupIMulKHR : I32EnumAttrCase<"OpGroupIMulKHR", 6401>;
def SPIRV_OC_OpGroupFMulKHR : I32EnumAttrCase<"OpGroupFMulKHR", 6402>;
-def SPIRV_OC_OpRoundFToTF32INTEL : I32EnumAttrCase<"OpRoundFToTF32INTEL", 6426>;
+def SPIRV_OC_OpRoundFToTF32INTEL : I32EnumAttrCase<"OpRoundFToTF32INTEL", 6426>;
def SPIRV_OpcodeAttr :
SPIRV_I32EnumAttr<"Opcode", "valid SPIR-V instructions", "opcode", [
@@ -4698,9 +4698,10 @@ def SPIRV_OpcodeAttr :
SPIRV_OC_OpEmitMeshTasksEXT, SPIRV_OC_OpSetMeshOutputsEXT,
SPIRV_OC_OpSubgroupBlockReadINTEL, SPIRV_OC_OpSubgroupBlockWriteINTEL,
SPIRV_OC_OpAssumeTrueKHR, SPIRV_OC_OpAtomicFAddEXT,
- SPIRV_OC_OpConvertFToBF16INTEL, SPIRV_OC_OpConvertBF16ToFINTEL,
+ SPIRV_OC_OpConvertFToBF16INTEL, SPIRV_OC_OpConvertBF16ToFINTEL,
SPIRV_OC_OpControlBarrierArriveINTEL, SPIRV_OC_OpControlBarrierWaitINTEL,
- SPIRV_OC_OpGroupIMulKHR, SPIRV_OC_OpGroupFMulKHR, SPIRV_OC_OpRoundFToTF32INTEL
+ SPIRV_OC_OpGroupIMulKHR, SPIRV_OC_OpGroupFMulKHR,
+ SPIRV_OC_OpRoundFToTF32INTEL
]>;
// End opcode section. Generated from SPIR-V spec; DO NOT MODIFY!
diff --git a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td
index 66f172135c958..6828d9092feae 100644
--- a/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/IR/SPIRVIntelExtOps.td
@@ -110,6 +110,50 @@ def SPIRV_INTELConvertBF16ToFOp : SPIRV_IntelVendorOp<"ConvertBF16ToF", []> {
let hasVerifier = 1;
}
+// -----
+
+def SPIRV_INTELRoundFToTF32Op : SPIRV_IntelVendorOp<"RoundFToTF32", []> {
+ let summary = "See extension SPV_INTEL_tensor_float32_conversion";
+
+ let description = [{
+ Convert value numerically from a 32-bit floating point type to TF32 with rounding to the nearest even.
+
+ Result Type must be a scalar or vector of 32-bit floating-point type.
+
+ Float Value must have the same type as Result Type.
+
+ Results are computed per component.
+
+ #### Example:
+
+ ```mlir
+ %1 = spirv.RoundFToTF32 %0 : f32 to f32
+ %3 = spirv.RoundFToTF32 %2 : vector<3xf32> to vector<3xf32>
+ ```
+
+ }];
+
+
+ let availability = [
+ MinVersion<SPIRV_V_1_0>,
+ MaxVersion<SPIRV_V_1_6>,
+ Extension<[SPV_INTEL_tensor_float32_conversion]>,
+ Capability<[SPIRV_C_TensorFloat32RoundingINTEL]>
+ ];
+
+ let arguments = (ins
+ SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$operand
+ );
+
+ let results = (outs
+ SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$result
+ );
+ let assemblyFormat = [{
+ $operand attr-dict `:` type($operand) `to` type($result)
+ }];
+
+ let hasVerifier = 1;
+}
// -----
@@ -198,50 +242,6 @@ def SPIRV_INTELControlBarrierWaitOp
```
}];
}
-// -----
-
-def SPIRV_INTELRoundFToTF32Op : SPIRV_IntelVendorOp<"RoundFToTF32", []> {
- let summary = "See extension SPV_INTEL_tensor_float32_conversion";
-
- let description = [{
- Convert value numerically from a 32-bit floating point type to TF32 with rounding to the nearest even.
-
- Result Type must be a scalar or vector of 32-bit floating-point type.
-
- Float Value must have the same type as Result Type.
-
- Results are computed per component.
-
- #### Example:
-
- ```mlir
- %1 = spirv.RoundFToTF32 %0 : f32 to f32
- %3 = spirv.RoundFToTF32 %2 : vector<3xf32> to vector<3xf32>
- ```
-
- }];
-
-
- let availability = [
- MinVersion<SPIRV_V_1_0>,
- MaxVersion<SPIRV_V_1_6>,
- Extension<[SPV_INTEL_tensor_float32_conversion]>,
- Capability<[SPIRV_C_tensor_float32_conversion]>
- ];
-
- let arguments = (ins
- SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$operand
- );
-
- let results = (outs
- SPIRV_ScalarOrVectorOf<SPIRV_Float32>:$result
- );
- let assemblyFormat = [{
- $operand attr-dict `:` type($operand) `to` type($result)
- }];
-
- let hasVerifier = 1;
-}
// -----
diff --git a/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp b/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp
index 170b501daa080..fc3e7308356bf 100644
--- a/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp
+++ b/mlir/lib/Dialect/SPIRV/IR/CastOps.cpp
@@ -311,27 +311,6 @@ LogicalResult INTELConvertFToBF16Op::verify() {
return success();
}
-//===----------------------------------------------------------------------===//
-// spirv.INTELConvertFToBF16Op
-//===----------------------------------------------------------------------===//
-
-LogicalResult INTELConvertFToBF16Op::verify() {
- auto operandType = getOperand().getType();
- auto resultType = getResult().getType();
- // ODS checks that vector result type and vector operand type have the same
- // shape.
- if (auto vectorType = llvm::dyn_cast<VectorType>(operandType)) {
- unsigned operandNumElements = vectorType.getNumElements();
- unsigned resultNumElements =
- llvm::cast<VectorType>(resultType).getNumElements();
- if (operandNumElements != resultNumElements) {
- return emitOpError(
- "operand and result must have same number of elements");
- }
- }
- return success();
-}
-
//===----------------------------------------------------------------------===//
// spirv.INTELRoundFToTF32Op
//===----------------------------------------------------------------------===//
@@ -352,6 +331,7 @@ LogicalResult INTELRoundFToTF32Op::verify() {
}
return success();
}
+
//===----------------------------------------------------------------------===//
// spirv.FConvertOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir b/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir
index bb15d018a6c44..aa5bee5796cfa 100644
--- a/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/IR/intel-ext-ops.mlir
@@ -72,6 +72,42 @@ spirv.func @bf16_to_f32_vec_unsupported(%arg0 : vector<2xi16>) "None" {
// -----
+//===----------------------------------------------------------------------===//
+// spirv.INTEL.RoundFToTF32
+//===----------------------------------------------------------------------===//
+
+spirv.func @f32_to_tf32(%arg0 : f32) "None" {
+ // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : f32 to f32
+ %0 = spirv.INTEL.RoundFToTF32 %arg0 : f32 to f32
+ spirv.Return
+}
+
+// -----
+
+spirv.func @f32_to_tf32_vec(%arg0 : vector<2xf32>) "None" {
+ // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : vector<2xf32> to vector<2xf32>
+ %0 = spirv.INTEL.RoundFToTF32 %arg0 : vector<2xf32> to vector<2xf32>
+ spirv.Return
+}
+
+// -----
+
+spirv.func @f32_to_tf32_unsupported(%arg0 : f64) "None" {
+ // expected-error @+1 {{operand #0 must be Float32 or vector of Float32 values of length 2/3/4/8/16, but got}}
+ %0 = spirv.INTEL.RoundFToTF32 %arg0 : f64 to f32
+ spirv.Return
+}
+
+// -----
+
+spirv.func @f32_to_tf32_vec_unsupported(%arg0 : vector<2xf32>) "None" {
+ // expected-error @+1 {{operand and result must have same number of elements}}
+ %0 = spirv.INTEL.RoundFToTF32 %arg0 : vector<2xf32> to vector<4xf32>
+ spirv.Return
+}
+
+// -----
+
//===----------------------------------------------------------------------===//
// spirv.INTEL.SplitBarrier
//===----------------------------------------------------------------------===//
>From 566047bf63b14029c4d5b9e04aa604a33dbe5279 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 23 Jul 2025 12:39:57 -0700
Subject: [PATCH 06/14] give mlir target test
---
mlir/test/Target/SPIRV/intel-ext-ops.mlir | 22 ++++++++++++++++++++++
1 file changed, 22 insertions(+)
diff --git a/mlir/test/Target/SPIRV/intel-ext-ops.mlir b/mlir/test/Target/SPIRV/intel-ext-ops.mlir
index 6d2fd324363c6..53cf8bf8fbd62 100644
--- a/mlir/test/Target/SPIRV/intel-ext-ops.mlir
+++ b/mlir/test/Target/SPIRV/intel-ext-ops.mlir
@@ -32,6 +32,28 @@ spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [Bfloat16ConversionINTEL]
// -----
+//===----------------------------------------------------------------------===//
+// spirv.INTEL.RoundFToTF32
+//===----------------------------------------------------------------------===//
+
+spirv.module Logical GLSL450 requires #spirv.vce<v1.0, [TensorFloat32RoundingINTEL], [SPV_INTEL_tensor_float32_conversion]> {
+ // CHECK-LABEL: @f32_to_tf32
+ spirv.func @f32_to_tf32(%arg0 : f32) "None" {
+ // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : f32 to f32
+ %1 = spirv.INTEL.RoundFToTF32 %arg0 : f32 to f32
+ spirv.Return
+ }
+
+ // CHECK-LABEL: @f32_to_tf32_vec
+ spirv.func @f32_to_tf32_vec(%arg0 : vector<2xf32>) "None" {
+ // CHECK: {{%.*}} = spirv.INTEL.RoundFToTF32 {{%.*}} : vector<2xf32> to vector<2xf32>
+ %1 = spirv.INTEL.RoundFToTF32 %arg0 : vector<2xf32> to vector<2xf32>
+ spirv.Return
+ }
+}
+
+// -----
+
//===----------------------------------------------------------------------===//
// spirv.INTEL.SplitBarrier
//===----------------------------------------------------------------------===//
>From 7280914a546032561b21b39e6a258a025b0810ce Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 23 Jul 2025 14:17:28 -0700
Subject: [PATCH 07/14] code clean up, fix small mistake
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index 8e8c34e2bd925..e167770db1b2e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -2682,8 +2682,8 @@ static bool generateConvertInst(const StringRef DemangledCall,
const auto *ST = static_cast<const SPIRVSubtarget *>(
&MIRBuilder.getMF().getSubtarget());
if (!ST->canUseExtension(
- SPIRV::Extension::SPV_INTEL_bfloat16_conversion))
- NeedExtMsg = "SPV_INTEL_bfloat16_conversion";
+ SPIRV::Extension::SPV_INTEL_tensor_float32_conversion))
+ NeedExtMsg = "SPV_INTEL_tensor_float32_conversion";
IsRightComponentsNumber =
GR->getScalarOrVectorComponentCount(Call->Arguments[0]) ==
GR->getScalarOrVectorComponentCount(Call->ReturnRegister);
>From 1749973c60a35abd2b7fa984f5c376790bcb8cf8 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 23 Jul 2025 14:31:12 -0700
Subject: [PATCH 08/14] fix small issue
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 18 ++++++++++++++++++
llvm/lib/Target/SPIRV/SPIRVInstrInfo.td | 4 ++--
llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp | 2 +-
llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td | 2 +-
4 files changed, 22 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 8f96329329e17..5836a6491e17a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1557,6 +1557,24 @@ foreach conv = ["FToBF16INTEL", "BF16ToFINTEL"] in {
def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>;
}
+// SPV_INTEL_tensor_float32_conversion
+multiclass DemangledTF32RoundBuiltin<string name1, string name2> {
+ // Create records for scalar and vector conversions.
+ foreach i = ["", "2", "3", "4", "8", "16"] in {
+ def : DemangledBuiltin<!strconcat("intel_round_", name1, i, name2, i), OpenCL_std, Convert, 1, 1>;
+ def : ConvertBuiltin<!strconcat("intel_round_", name1, i, name2, i), OpenCL_std>;
+ }
+}
+
+defm : DemangledFToTF32RoundBuiltin<"tensor_float32", "_as_float">;
+defm : DemangledFToTF32RoundBuiltin<"as_tensor_float32", "_float">;
+
+foreach conv = ["FToTF32INTEL"] in {
+ def : DemangledBuiltin<!strconcat("__spirv_Round", conv), OpenCL_std, Convert, 1, 1>;
+ def : ConvertBuiltin<!strconcat("__spirv_Round", conv), OpenCL_std>;
+}
+
+
//===----------------------------------------------------------------------===//
// Class defining a vector data load/store builtin record used for lowering
// into OpExtInst instruction.
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
index a04ed6a42c868..f0b938d681dba 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
+++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td
@@ -441,11 +441,11 @@ def OpBitcast : UnOp<"OpBitcast", 124>;
def OpPtrCastToCrossWorkgroupINTEL : UnOp<"OpPtrCastToCrossWorkgroupINTEL", 5934>;
def OpCrossWorkgroupCastToPtrINTEL : UnOp<"OpCrossWorkgroupCastToPtrINTEL", 5938>;
-// SPV_INTEL_tensor_float32_conversion
+// SPV_INTEL_bfloat16_conversion
def OpConvertFToBF16INTEL : UnOp<"OpConvertFToBF16INTEL", 6116>;
def OpConvertBF16ToFINTEL : UnOp<"OpConvertBF16ToFINTEL", 6117>;
-// SPV_INTEL_bfloat16_conversion
+// SPV_INTEL_tensor_float32_conversion
def OpRoundFToTF32INTEL : UnOp<"OpRoundFToTF32INTEL", 6426>;
// 3.42.12 Composite Instructions
diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
index c252fc5897518..eac337c3c4246 100644
--- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp
@@ -1567,7 +1567,7 @@ void addInstrRequirements(const MachineInstr &MI,
case SPIRV::OpRoundFToTF32INTEL:
if (ST.canUseExtension(SPIRV::Extension::SPV_INTEL_tensor_float32_conversion)) {
Reqs.addExtension(SPIRV::Extension::SPV_INTEL_tensor_float32_conversion);
- Reqs.addCapability(SPIRV::Capability::TF32ConversionINTEL);
+ Reqs.addCapability(SPIRV::Capability::TensorFloat32RoundingINTEL);
}
break;
case SPIRV::OpVariableLengthArrayINTEL:
diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
index 7b2139a1c84a8..e94c364fc8443 100644
--- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
+++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td
@@ -503,7 +503,7 @@ defm VariableLengthArrayINTEL : CapabilityOperand<5817, 0, 0, [SPV_INTEL_variabl
defm GroupUniformArithmeticKHR : CapabilityOperand<6400, 0, 0, [SPV_KHR_uniform_group_instructions], []>;
defm USMStorageClassesINTEL : CapabilityOperand<5935, 0, 0, [SPV_INTEL_usm_storage_classes], [Kernel]>;
defm BFloat16ConversionINTEL : CapabilityOperand<6115, 0, 0, [SPV_INTEL_bfloat16_conversion], []>;
-defm TF32ConversionINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>;
+defm TensorFloat32RoundingINTEL : CapabilityOperand<6425, 0, 0, [SPV_INTEL_tensor_float32_conversion], []>;
defm GlobalVariableHostAccessINTEL : CapabilityOperand<6187, 0, 0, [SPV_INTEL_global_variable_host_access], []>;
defm HostAccessINTEL : CapabilityOperand<6188, 0, 0, [SPV_INTEL_global_variable_host_access], []>;
defm GlobalVariableFPGADecorationsINTEL : CapabilityOperand<6189, 0, 0, [SPV_INTEL_global_variable_fpga_decorations], []>;
>From 1693a94faec8ec869ad6d837689b132134d55914 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Wed, 23 Jul 2025 14:49:33 -0700
Subject: [PATCH 09/14] small fix in SPIRVBuiltins.td
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 5836a6491e17a..00bbe56786f5b 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1566,8 +1566,8 @@ multiclass DemangledTF32RoundBuiltin<string name1, string name2> {
}
}
-defm : DemangledFToTF32RoundBuiltin<"tensor_float32", "_as_float">;
-defm : DemangledFToTF32RoundBuiltin<"as_tensor_float32", "_float">;
+defm : DemangledTF32RoundBuiltin<"tensor_float32", "_as_float">;
+defm : DemangledTF32RoundBuiltin<"as_tensor_float32", "_float">;
foreach conv = ["FToTF32INTEL"] in {
def : DemangledBuiltin<!strconcat("__spirv_Round", conv), OpenCL_std, Convert, 1, 1>;
>From a443d4ef08d6765fa14e1c5b0dda593ba92e21ff Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Mon, 28 Jul 2025 07:09:40 -0700
Subject: [PATCH 10/14] add llvm ir tests
---
.../tf32-conv-negative1.ll | 12 ++++++++++++
.../tf32-conv-negative2.ll | 12 ++++++++++++
.../SPV_INTEL_tensor_float32_conversion/tf32-conv.ll | 9 ++++++---
3 files changed, 30 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
create mode 100644 llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
new file mode 100644
index 0000000000000..92843abf1f67a
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: result and argument must have the same number of components
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_func void @test(<8 x float> %in) {
+ %res = tail call spir_func i16 @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ ret void
+}
+
+declare spir_func float @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
new file mode 100644
index 0000000000000..dc6cd0a9c1896
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
@@ -0,0 +1,12 @@
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: result and argument must have the same number of components
+
+target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
+target triple = "spir64-unknown-unknown"
+
+define spir_func void @test(<8 x float> %in) {
+ %res = tail call spir_func <4 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ ret void
+}
+
+declare spir_func <4 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
\ No newline at end of file
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
index 3329f3aace685..e9194a1c79e05 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -3,9 +3,12 @@
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
-define spir_func void @test(<8 x i16> %in) {
- %res = tail call spir_func <8 x i16> @_Z27__spirv_RoundFToTF32INTELDv8_s(<8 x i16> %in)
+define spir_func void @test(float %in) {
+ %res1 = tail call spir_func zeroext float @_Z27__spirv_RoundFToTF32INTELf(float %a)
+ %res3 = tail call spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ %res5 = tail call spir_func zeroext float @_Z27__spirv_RoundFToTF32INTELf(float 1.500000e+00)
ret void
}
-declare spir_func float @_Z27__spirv_RoundFToTF32INTELDv8_s(<8 x i16>)
+declare spir_func zeroext float @_Z27__spirv_RoundFToTF32INTELf(float)
+declare spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
\ No newline at end of file
>From efc026f82d754c7e9bb35a163d89fcaae40e8e4f Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Mon, 28 Jul 2025 11:58:32 -0700
Subject: [PATCH 11/14] fix some small issue
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.td | 7 +++++--
llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp | 4 +++-
.../SPV_INTEL_tensor_float32_conversion/tf32-conv.ll | 10 ++++++----
3 files changed, 14 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
index 00bbe56786f5b..963746ab4771a 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td
@@ -1461,7 +1461,8 @@ class ConvertBuiltin<string name, InstructionSet set> {
bit IsRounded = !not(!eq(!find(name, "_rt"), -1));
bit IsBfloat16 = !or(!not(!eq(!find(name, "BF16"), -1)),
!not(!eq(!find(name, "bfloat16"), -1)));
- bit IsTF32 = !not(!eq(!find(name, "TF32"), -1));
+ bit IsTF32 = !or(!not(!eq(!find(name, "TF32"), -1)),
+ !not(!eq(!find(name, "tensor_float32"), -1)));
FPRoundingMode RoundingMode = !cond(!not(!eq(!find(name, "_rte"), -1)) : RTE,
!not(!eq(!find(name, "_rtz"), -1)) : RTZ,
!not(!eq(!find(name, "_rtp"), -1)) : RTP,
@@ -1557,7 +1558,9 @@ foreach conv = ["FToBF16INTEL", "BF16ToFINTEL"] in {
def : ConvertBuiltin<!strconcat("__spirv_Convert", conv), OpenCL_std>;
}
-// SPV_INTEL_tensor_float32_conversion
+// cl_intel_tensor_float32_conversions SPV_INTEL_tensor_float32_conversion
+// Multiclass used to define at the same time both a demangled builtin records
+// and a corresponding convert builtin records.
multiclass DemangledTF32RoundBuiltin<string name1, string name2> {
// Create records for scalar and vector conversions.
foreach i = ["", "2", "3", "4", "8", "16"] in {
diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
index 2726203d253ad..945d3febe0bcf 100644
--- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp
@@ -102,7 +102,9 @@ static const std::map<std::string, SPIRV::Extension::Extension, std::less<>>
SPIRV::Extension::Extension::SPV_INTEL_2d_block_io},
{"SPV_INTEL_int4", SPIRV::Extension::Extension::SPV_INTEL_int4},
{"SPV_KHR_float_controls2",
- SPIRV::Extension::Extension::SPV_KHR_float_controls2}};
+ SPIRV::Extension::Extension::SPV_KHR_float_controls2},
+ {"SPV_INTEL_tensor_float32_conversion",
+ SPIRV::Extension::Extension::SPV_INTEL_tensor_float32_conversion}};
bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName,
StringRef ArgValue,
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
index e9194a1c79e05..3c1bd74e17c30 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -1,14 +1,16 @@
; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - %.spvasm
+; CHECK: OpCapability BFloat16ConversionINTEL
+
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
-define spir_func void @test(float %in) {
- %res1 = tail call spir_func zeroext float @_Z27__spirv_RoundFToTF32INTELf(float %a)
+define spir_func void @test(float %a, <8 x float> %in) {
+ %res1 = tail call spir_func float @_Z27__spirv_RoundFToTF32INTELf(float %a)
%res3 = tail call spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
- %res5 = tail call spir_func zeroext float @_Z27__spirv_RoundFToTF32INTELf(float 1.500000e+00)
+ %res5 = tail call spir_func float @_Z27__spirv_RoundFToTF32INTELf(float 1.500000e+00)
ret void
}
-declare spir_func zeroext float @_Z27__spirv_RoundFToTF32INTELf(float)
+declare spir_func float @_Z27__spirv_RoundFToTF32INTELf(float)
declare spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
\ No newline at end of file
>From c7cafa3cf93d322b6e1322da502453c694973781 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Mon, 28 Jul 2025 12:48:35 -0700
Subject: [PATCH 12/14] update the test
---
.../tf32-conv.ll | 24 ++++++++++++++++---
1 file changed, 21 insertions(+), 3 deletions(-)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
index 3c1bd74e17c30..ff85f744aaf89 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -7,10 +7,28 @@ target triple = "spir64-unknown-unknown"
define spir_func void @test(float %a, <8 x float> %in) {
%res1 = tail call spir_func float @_Z27__spirv_RoundFToTF32INTELf(float %a)
- %res3 = tail call spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
- %res5 = tail call spir_func float @_Z27__spirv_RoundFToTF32INTELf(float 1.500000e+00)
+ %res2 = tail call spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ %res3 = tail call spir_func float @_Z27__spirv_RoundFToTF32INTELf(float 1.500000e+00)
ret void
}
declare spir_func float @_Z27__spirv_RoundFToTF32INTELf(float)
-declare spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
\ No newline at end of file
+declare spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
+
+define dso_local spir_kernel void @test_ocl(float %a) {
+entry:
+ %res4 = call spir_func float @_Z31intel_round_as_tensor_float32_floatt(float 0.000000e+00)
+ %res5 = call spir_func <2 x float> @_Z33intel_round_as_tensor_float322__float2Dv2_t(<2 x float> zeroinitializer)
+ %res6 = call spir_func <3 x float> @_Z33intel_round_as_tensor_float323_float3Dv3_t(<3 x float> zeroinitializer)
+ %res7 = call spir_func <4 x float> @_Z33intel_round_as_tensor_float324_float4Dv4_t(<4 x float> zeroinitializer)
+ %res8 = call spir_func <8 x float> @_Z33intel_round_as_tensor_float328_float8Dv8_t(<8 x float> zeroinitializer)
+ %res9 = call spir_func <16 x float> @_Z35intel_round_as_tensor_float3216_float16Dv16_t(<16 x float> zeroinitializer)
+ ret void
+}
+
+declare spir_func float @_Z31intel_round_as_tensor_float32_floatt(float)
+declare spir_func <2 x float> @_Z33intel_round_as_tensor_float322__float2Dv2_t(<2 x float>)
+declare spir_func <3 x float> @_Z33intel_round_as_tensor_float323_float3Dv3_t(<3 x float>)
+declare spir_func <4 x float> @_Z33intel_round_as_tensor_float324_float4Dv4_t(<4 x float>)
+declare spir_func <8 x float> @_Z33intel_round_as_tensor_float328_float8Dv8_t(<8 x float>)
+declare spir_func <16 x float> @_Z35intel_round_as_tensor_float3216_float16Dv16_t(<16 x float>)
\ No newline at end of file
>From 13fda308b021a641c22a93fc160af63a0e310a09 Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Tue, 29 Jul 2025 13:24:03 -0700
Subject: [PATCH 13/14] fix bug in test and in feature implementation
---
llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp | 3 +-
.../tf32-conv.ll | 66 +++++++++++++------
2 files changed, 49 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
index e167770db1b2e..03ca2ad1d8fa5 100644
--- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp
@@ -231,6 +231,7 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall,
// - "__spirv_SubgroupImageMediaBlockReadINTEL"
// - "__spirv_SubgroupImageMediaBlockWriteINTEL"
// - "__spirv_Convert"
+ // - "__spirv_Round"
// - "__spirv_UConvert"
// - "__spirv_SConvert"
// - "__spirv_FConvert"
@@ -243,7 +244,7 @@ std::string lookupBuiltinNameHelper(StringRef DemangledCall,
"SDotKHR|SUDotKHR|SDotAccSatKHR|UDotAccSatKHR|SUDotAccSatKHR|"
"ReadClockKHR|SubgroupBlockReadINTEL|SubgroupImageBlockReadINTEL|"
"SubgroupImageMediaBlockReadINTEL|SubgroupImageMediaBlockWriteINTEL|"
- "Convert|"
+ "Convert|Round"
"UConvert|SConvert|FConvert|SatConvert)[^_]*)(_R[^_]*_?(\\w+)?.*)?");
std::smatch Match;
if (std::regex_match(BuiltinName, Match, SpvWithR) && Match.size() > 1) {
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
index ff85f744aaf89..bf4e98dc5d79d 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -1,34 +1,62 @@
-; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - %.spvasm
+; RUN: llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o - -filetype=obj | spirv-val %}
-; CHECK: OpCapability BFloat16ConversionINTEL
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; CHECK-ERROR: the builtin requires the following SPIR-V extension: SPV_INTEL_tensor_float32_conversion
+
+; CHECK: OpCapability TensorFloat32RoundingINTEL
+; CHECK: OpExtension "SPV_INTEL_tensor_float32_conversion"
+
+; CHECK-DAG: %[[VoidTy:.*]] = OpTypeVoid
+; CHECK-DAG: %[[FP32Ty:.*]] = OpTypeFloat 32
+; CHECK-DAG: %[[VecFloat2:.*]] = OpTypeVector %[[FP32Ty]] 2
+; CHECK-DAG: %[[VecFloat3:.*]] = OpTypeVector %[[FP32Ty]] 3
+; CHECK-DAG: %[[VecFloat4:.*]] = OpTypeVector %[[FP32Ty]] 4
+; CHECK-DAG: %[[VecFloat8:.*]] = OpTypeVector %[[FP32Ty]] 8
+; CHECK-DAG: %[[VecFloat16:.*]] = OpTypeVector %[[FP32Ty]] 16
+; CHECK-DAG: %[[FloatConstId:.*]] = OpConstant %[[FP32Ty]] 1.5
+
+; CHECK: OpFunction %[[VoidTy]]
+; CHECK: %[[FP32ValId:.*]] = OpFunctionParameter %[[FP32Ty]]
+; CHECK: %[[FP32v8ValId:.*]] = OpFunctionParameter %[[VecFloat8]]
+; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]] %[[FP32ValId]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat8]] %[[FP32v8ValId]]
+; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]] %[[FloatConstId]]
+
+; CHECK: OpRoundFToTF32INTEL %[[FP32Ty]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat2]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat3]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat4]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat8]]
+; CHECK: OpRoundFToTF32INTEL %[[VecFloat16]]
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
define spir_func void @test(float %a, <8 x float> %in) {
- %res1 = tail call spir_func float @_Z27__spirv_RoundFToTF32INTELf(float %a)
- %res2 = tail call spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
- %res3 = tail call spir_func float @_Z27__spirv_RoundFToTF32INTELf(float 1.500000e+00)
+ %res1 = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELf(float %a)
+ %res2 = tail call spir_func <8 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ %res3 = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELf(float 1.500000e+00)
ret void
}
-declare spir_func float @_Z27__spirv_RoundFToTF32INTELf(float)
-declare spir_func <8 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
+declare spir_func float @_Z25__spirv_RoundFToTF32INTELf(float)
+declare spir_func <8 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
define dso_local spir_kernel void @test_ocl(float %a) {
entry:
- %res4 = call spir_func float @_Z31intel_round_as_tensor_float32_floatt(float 0.000000e+00)
- %res5 = call spir_func <2 x float> @_Z33intel_round_as_tensor_float322__float2Dv2_t(<2 x float> zeroinitializer)
- %res6 = call spir_func <3 x float> @_Z33intel_round_as_tensor_float323_float3Dv3_t(<3 x float> zeroinitializer)
- %res7 = call spir_func <4 x float> @_Z33intel_round_as_tensor_float324_float4Dv4_t(<4 x float> zeroinitializer)
- %res8 = call spir_func <8 x float> @_Z33intel_round_as_tensor_float328_float8Dv8_t(<8 x float> zeroinitializer)
- %res9 = call spir_func <16 x float> @_Z35intel_round_as_tensor_float3216_float16Dv16_t(<16 x float> zeroinitializer)
+ %res4 = call spir_func float @_Z35intel_round_as_tensor_float32_floatt(float 0.000000e+00)
+ %res5 = call spir_func <2 x float> @_Z37intel_round_as_tensor_float322_float2Dv2_t(<2 x float> zeroinitializer)
+ %res6 = call spir_func <3 x float> @_Z37intel_round_as_tensor_float323_float3Dv3_t(<3 x float> zeroinitializer)
+ %res7 = call spir_func <4 x float> @_Z37intel_round_as_tensor_float324_float4Dv4_t(<4 x float> zeroinitializer)
+ %res8 = call spir_func <8 x float> @_Z37intel_round_as_tensor_float328_float8Dv8_t(<8 x float> zeroinitializer)
+ %res9 = call spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float> zeroinitializer)
ret void
}
-declare spir_func float @_Z31intel_round_as_tensor_float32_floatt(float)
-declare spir_func <2 x float> @_Z33intel_round_as_tensor_float322__float2Dv2_t(<2 x float>)
-declare spir_func <3 x float> @_Z33intel_round_as_tensor_float323_float3Dv3_t(<3 x float>)
-declare spir_func <4 x float> @_Z33intel_round_as_tensor_float324_float4Dv4_t(<4 x float>)
-declare spir_func <8 x float> @_Z33intel_round_as_tensor_float328_float8Dv8_t(<8 x float>)
-declare spir_func <16 x float> @_Z35intel_round_as_tensor_float3216_float16Dv16_t(<16 x float>)
\ No newline at end of file
+declare spir_func float @_Z35intel_round_as_tensor_float32_floatt(float)
+declare spir_func <2 x float> @_Z37intel_round_as_tensor_float322_float2Dv2_t(<2 x float>)
+declare spir_func <3 x float> @_Z37intel_round_as_tensor_float323_float3Dv3_t(<3 x float>)
+declare spir_func <4 x float> @_Z37intel_round_as_tensor_float324_float4Dv4_t(<4 x float>)
+declare spir_func <8 x float> @_Z37intel_round_as_tensor_float328_float8Dv8_t(<8 x float>)
+declare spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float>)
\ No newline at end of file
>From 1a08e04d17164d7df8b6ba67eb4b5b15f6e7f40c Mon Sep 17 00:00:00 2001
From: "Zhang, Yixing" <yixing.zhang at intel.com>
Date: Tue, 29 Jul 2025 13:56:07 -0700
Subject: [PATCH 14/14] fix all the tests
---
.../tf32-conv-negative1.ll | 6 +++---
.../tf32-conv-negative2.ll | 6 +++---
.../SPV_INTEL_tensor_float32_conversion/tf32-conv.ll | 2 +-
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
index 92843abf1f67a..fa708ab022a85 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative1.ll
@@ -1,12 +1,12 @@
-; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
; CHECK-ERROR: result and argument must have the same number of components
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
define spir_func void @test(<8 x float> %in) {
- %res = tail call spir_func i16 @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ %res = tail call spir_func float @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
ret void
}
-declare spir_func float @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
\ No newline at end of file
+declare spir_func float @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
index dc6cd0a9c1896..630b2fdd7696c 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv-negative2.ll
@@ -1,12 +1,12 @@
-; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-extensions=SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
+; RUN: not llc -O0 -mtriple=spirv32-unknown-unknown --spirv-ext=+SPV_INTEL_tensor_float32_conversion %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR
; CHECK-ERROR: result and argument must have the same number of components
target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64"
target triple = "spir64-unknown-unknown"
define spir_func void @test(<8 x float> %in) {
- %res = tail call spir_func <4 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
+ %res = tail call spir_func <4 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float> %in)
ret void
}
-declare spir_func <4 x float> @_Z27__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
\ No newline at end of file
+declare spir_func <4 x float> @_Z25__spirv_RoundFToTF32INTELDv8_f(<8 x float>)
diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
index bf4e98dc5d79d..dcad78d17bff7 100644
--- a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
+++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_tensor_float32_conversion/tf32-conv.ll
@@ -59,4 +59,4 @@ declare spir_func <2 x float> @_Z37intel_round_as_tensor_float322_float2Dv2_t(<2
declare spir_func <3 x float> @_Z37intel_round_as_tensor_float323_float3Dv3_t(<3 x float>)
declare spir_func <4 x float> @_Z37intel_round_as_tensor_float324_float4Dv4_t(<4 x float>)
declare spir_func <8 x float> @_Z37intel_round_as_tensor_float328_float8Dv8_t(<8 x float>)
-declare spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float>)
\ No newline at end of file
+declare spir_func <16 x float> @_Z39intel_round_as_tensor_float3216_float16Dv16_t(<16 x float>)
More information about the Mlir-commits
mailing list