[Mlir-commits] [mlir] Revert "[mlir][x86vector] AVX Convert/Broadcast BF16 to F32 instructions" (PR #136781)
Jan Patrick Lehr
llvmlistbot at llvm.org
Tue Apr 22 15:44:23 PDT 2025
https://github.com/jplehr created https://github.com/llvm/llvm-project/pull/136781
Reverts llvm/llvm-project#135143
This broke multiple bots, see PR.
>From a3d764184b1edf23e43090c2fd6dcc833a802a05 Mon Sep 17 00:00:00 2001
From: Jan Patrick Lehr <JanPatrick.Lehr at amd.com>
Date: Wed, 23 Apr 2025 00:43:51 +0200
Subject: [PATCH] =?UTF-8?q?Revert=20"[mlir][x86vector]=20AVX=20Convert/Bro?=
=?UTF-8?q?adcast=20BF16=20to=20F32=20instructions=20(#13=E2=80=A6"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This reverts commit 89a792e4e3f0464d071e71db0fd14d01e4aff971.
---
.../mlir/Dialect/X86Vector/X86Vector.td | 123 +-----------------
.../mlir/Dialect/X86Vector/X86VectorDialect.h | 2 -
.../Dialect/X86Vector/X86VectorInterfaces.td | 2 +-
.../Dialect/X86Vector/IR/X86VectorDialect.cpp | 44 +------
.../Transforms/LegalizeForLLVMExport.cpp | 11 +-
.../Dialect/X86Vector/legalize-for-llvm.mlir | 54 --------
mlir/test/Dialect/X86Vector/roundtrip.mlir | 60 ---------
mlir/test/Target/LLVMIR/x86vector.mlir | 56 +-------
8 files changed, 12 insertions(+), 340 deletions(-)
diff --git a/mlir/include/mlir/Dialect/X86Vector/X86Vector.td b/mlir/include/mlir/Dialect/X86Vector/X86Vector.td
index 126fa0e352656..5be0d92db4630 100644
--- a/mlir/include/mlir/Dialect/X86Vector/X86Vector.td
+++ b/mlir/include/mlir/Dialect/X86Vector/X86Vector.td
@@ -83,7 +83,7 @@ def MaskCompressOp : AVX512_Op<"mask.compress", [Pure,
}
}];
let extraClassDeclaration = [{
- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&);
}];
}
@@ -404,127 +404,8 @@ def DotOp : AVX_LowOp<"dot", [Pure,
}
}];
let extraClassDeclaration = [{
- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
+ SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&);
}];
}
-
-//----------------------------------------------------------------------------//
-// AVX: Convert packed BF16 even-indexed/odd-indexed elements into packed F32
-//----------------------------------------------------------------------------//
-
-def CvtPackedEvenIndexedBF16ToF32Op : AVX_Op<"cvt.packed.even.indexed.bf16_to_f32", [MemoryEffects<[MemRead]>,
- DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
- let summary = "AVX: Convert packed BF16 even-indexed elements into packed F32 Data.";
- let description = [{
- #### From the Intel Intrinsics Guide:
-
- Convert packed BF16 (16-bit) floating-point even-indexed elements stored at
- memory locations starting at location `__A` to packed single-precision
- (32-bit) floating-point elements, and store the results in `dst`.
-
- Example:
- ```mlir
- %dst = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- ```
- }];
- let arguments = (ins AnyMemRef:$a);
- let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
- let assemblyFormat =
- "$a attr-dict`:` type($a)`->` type($dst)";
-
- let extraClassDefinition = [{
- std::string $cppClass::getIntrinsicName() {
- std::string intr = "llvm.x86.vcvtneebf162ps";
- VectorType vecType = getDst().getType();
- unsigned elemBitWidth = vecType.getElementTypeBitWidth();
- unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
- intr += std::to_string(opBitWidth);
- return intr;
- }
- }];
-
- let extraClassDeclaration = [{
- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
- }];
-}
-
-def CvtPackedOddIndexedBF16ToF32Op : AVX_Op<"cvt.packed.odd.indexed.bf16_to_f32", [MemoryEffects<[MemRead]>,
- DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
- let summary = "AVX: Convert packed BF16 odd-indexed elements into packed F32 Data.";
- let description = [{
- #### From the Intel Intrinsics Guide:
-
- Convert packed BF16 (16-bit) floating-point odd-indexed elements stored at
- memory locations starting at location `__A` to packed single-precision
- (32-bit) floating-point elements, and store the results in `dst`.
-
- Example:
- ```mlir
- %dst = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- ```
- }];
- let arguments = (ins AnyMemRef:$a);
- let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
- let assemblyFormat =
- "$a attr-dict`:` type($a)`->` type($dst)";
-
- let extraClassDefinition = [{
- std::string $cppClass::getIntrinsicName() {
- std::string intr = "llvm.x86.vcvtneobf162ps";
- VectorType vecType = getDst().getType();
- unsigned elemBitWidth = vecType.getElementTypeBitWidth();
- unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
- intr += std::to_string(opBitWidth);
- return intr;
- }
- }];
-
- let extraClassDeclaration = [{
- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
- }];
-}
-
-//----------------------------------------------------------------------------//
-// AVX: Convert BF16 to F32 and broadcast into packed F32
-//----------------------------------------------------------------------------//
-
-def BcstBF16ToPackedF32Op : AVX_Op<"bcst.bf16_to_f32.packed", [MemoryEffects<[MemRead]>,
- DeclareOpInterfaceMethods<OneToOneIntrinsicOpInterface>]> {
- let summary = "AVX: Broadcasts BF16 into packed F32 Data.";
- let description = [{
- #### From the Intel Intrinsics Guide:
-
- Convert scalar BF16 (16-bit) floating-point element stored at memory locations
- starting at location `__A` to a single-precision (32-bit) floating-point,
- broadcast it to packed single-precision (32-bit) floating-point elements,
- and store the results in `dst`.
-
- Example:
- ```mlir
- %dst = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<8xf32>
- ```
- }];
- let arguments = (ins AnyMemRef:$a);
- let results = (outs VectorOfLengthAndType<[4, 8], [F32]>:$dst);
- let assemblyFormat =
- "$a attr-dict`:` type($a)`->` type($dst)";
-
- let extraClassDefinition = [{
- std::string $cppClass::getIntrinsicName() {
- std::string intr = "llvm.x86.vbcstnebf162ps";
- VectorType vecType = getDst().getType();
- unsigned elemBitWidth = vecType.getElementTypeBitWidth();
- unsigned opBitWidth = vecType.getShape()[0] * elemBitWidth;
- intr += std::to_string(opBitWidth);
- return intr;
- }
- }];
-
- let extraClassDeclaration = [{
- SmallVector<Value> getIntrinsicOperands(::mlir::RewriterBase&, const LLVMTypeConverter&);
- }];
-
-}
-
#endif // X86VECTOR_OPS
diff --git a/mlir/include/mlir/Dialect/X86Vector/X86VectorDialect.h b/mlir/include/mlir/Dialect/X86Vector/X86VectorDialect.h
index 308adfa5b9021..7bcf4c69b0a6c 100644
--- a/mlir/include/mlir/Dialect/X86Vector/X86VectorDialect.h
+++ b/mlir/include/mlir/Dialect/X86Vector/X86VectorDialect.h
@@ -14,8 +14,6 @@
#define MLIR_DIALECT_X86VECTOR_X86VECTORDIALECT_H_
#include "mlir/Bytecode/BytecodeOpInterface.h"
-#include "mlir/Conversion/LLVMCommon/Pattern.h"
-#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/BuiltinTypes.h"
#include "mlir/IR/Dialect.h"
#include "mlir/IR/OpDefinition.h"
diff --git a/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td b/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td
index 5176f4a447b6e..98d5ca70b4a7d 100644
--- a/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td
+++ b/mlir/include/mlir/Dialect/X86Vector/X86VectorInterfaces.td
@@ -58,7 +58,7 @@ def OneToOneIntrinsicOpInterface : OpInterface<"OneToOneIntrinsicOp"> {
}],
/*retType=*/"SmallVector<Value>",
/*methodName=*/"getIntrinsicOperands",
- /*args=*/(ins "::mlir::RewriterBase &":$rewriter, "const LLVMTypeConverter &":$typeConverter),
+ /*args=*/(ins "::mlir::RewriterBase &":$rewriter),
/*methodBody=*/"",
/*defaultImplementation=*/"return SmallVector<Value>($_op->getOperands());"
>,
diff --git a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp
index f5e5070c74f8f..5bb4dcfd60d83 100644
--- a/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp
+++ b/mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp
@@ -31,26 +31,6 @@ void x86vector::X86VectorDialect::initialize() {
>();
}
-static SmallVector<Value>
-getMemrefBuffPtr(Location loc, ::mlir::TypedValue<::mlir::MemRefType> memrefVal,
- RewriterBase &rewriter,
- const LLVMTypeConverter &typeConverter) {
- SmallVector<Value> operands;
- auto opType = memrefVal.getType();
-
- Type llvmStructType = typeConverter.convertType(opType);
- Value llvmStruct =
- rewriter
- .create<UnrealizedConversionCastOp>(loc, llvmStructType, memrefVal)
- .getResult(0);
- MemRefDescriptor memRefDescriptor(llvmStruct);
-
- Value ptr = memRefDescriptor.bufferPtr(rewriter, loc, typeConverter, opType);
- operands.push_back(ptr);
-
- return operands;
-}
-
LogicalResult x86vector::MaskCompressOp::verify() {
if (getSrc() && getConstantSrc())
return emitError("cannot use both src and constant_src");
@@ -65,8 +45,8 @@ LogicalResult x86vector::MaskCompressOp::verify() {
return success();
}
-SmallVector<Value> x86vector::MaskCompressOp::getIntrinsicOperands(
- RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) {
+SmallVector<Value>
+x86vector::MaskCompressOp::getIntrinsicOperands(RewriterBase &rewriter) {
auto loc = getLoc();
auto opType = getA().getType();
@@ -84,8 +64,7 @@ SmallVector<Value> x86vector::MaskCompressOp::getIntrinsicOperands(
}
SmallVector<Value>
-x86vector::DotOp::getIntrinsicOperands(RewriterBase &rewriter,
- const LLVMTypeConverter &typeConverter) {
+x86vector::DotOp::getIntrinsicOperands(RewriterBase &rewriter) {
SmallVector<Value> operands(getOperands());
// Dot product of all elements, broadcasted to all elements.
Value scale =
@@ -95,22 +74,5 @@ x86vector::DotOp::getIntrinsicOperands(RewriterBase &rewriter,
return operands;
}
-SmallVector<Value> x86vector::BcstBF16ToPackedF32Op::getIntrinsicOperands(
- RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) {
- return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter);
-}
-
-SmallVector<Value>
-x86vector::CvtPackedOddIndexedBF16ToF32Op::getIntrinsicOperands(
- RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) {
- return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter);
-}
-
-SmallVector<Value>
-x86vector::CvtPackedEvenIndexedBF16ToF32Op::getIntrinsicOperands(
- RewriterBase &rewriter, const LLVMTypeConverter &typeConverter) {
- return getMemrefBuffPtr(getLoc(), getA(), rewriter, typeConverter);
-}
-
#define GET_OP_CLASSES
#include "mlir/Dialect/X86Vector/X86Vector.cpp.inc"
diff --git a/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp b/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp
index d2297554a1012..c0c7f61f55f88 100644
--- a/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp
+++ b/mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp
@@ -96,8 +96,8 @@ struct OneToOneIntrinsicOpConversion
LogicalResult matchAndRewrite(x86vector::OneToOneIntrinsicOp op,
PatternRewriter &rewriter) const override {
return intrinsicRewrite(op, rewriter.getStringAttr(op.getIntrinsicName()),
- op.getIntrinsicOperands(rewriter, typeConverter),
- typeConverter, rewriter);
+ op.getIntrinsicOperands(rewriter), typeConverter,
+ rewriter);
}
private:
@@ -114,8 +114,7 @@ void mlir::populateX86VectorLegalizeForLLVMExportPatterns(
void mlir::configureX86VectorLegalizeForExportTarget(
LLVMConversionTarget &target) {
- target.addIllegalOp<
- MaskCompressOp, MaskRndScaleOp, MaskScaleFOp, Vp2IntersectOp, DotBF16Op,
- CvtPackedF32ToBF16Op, CvtPackedEvenIndexedBF16ToF32Op,
- CvtPackedOddIndexedBF16ToF32Op, BcstBF16ToPackedF32Op, RsqrtOp, DotOp>();
+ target.addIllegalOp<MaskCompressOp, MaskRndScaleOp, MaskScaleFOp,
+ Vp2IntersectOp, DotBF16Op, CvtPackedF32ToBF16Op, RsqrtOp,
+ DotOp>();
}
diff --git a/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir b/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir
index 93b304c44de8e..df0be7bce83be 100644
--- a/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir
+++ b/mlir/test/Dialect/X86Vector/legalize-for-llvm.mlir
@@ -95,60 +95,6 @@ func.func @avx512bf16_cvt_packed_f32_to_bf16_512(
return %0 : vector<16xbf16>
}
-// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128
-func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128(
- %a: memref<8xbf16>) -> vector<4xf32>
-{
- // CHECK: llvm.call_intrinsic "llvm.x86.vcvtneebf162ps128"
- %0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<8xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256
-func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256(
- %a: memref<16xbf16>) -> vector<8xf32>
-{
- // CHECK: llvm.call_intrinsic "llvm.x86.vcvtneebf162ps256"
- %0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128
-func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128(
- %a: memref<8xbf16>) -> vector<4xf32>
-{
- // CHECK: llvm.call_intrinsic "llvm.x86.vcvtneobf162ps128"
- %0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<8xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256
-func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256(
- %a: memref<16xbf16>) -> vector<8xf32>
-{
- // CHECK: llvm.call_intrinsic "llvm.x86.vcvtneobf162ps256"
- %0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_bsct_bf16_to_f32_packed_128
-func.func @avxbf16_bsct_bf16_to_f32_packed_128(
- %a: memref<1xbf16>) -> vector<4xf32>
-{
- // CHECK: llvm.call_intrinsic "llvm.x86.vbcstnebf162ps128"
- %0 = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_bsct_bf16_to_f32_packed_256
-func.func @avxbf16_bsct_bf16_to_f32_packed_256(
- %a: memref<1xbf16>) -> vector<8xf32>
-{
- // CHECK: llvm.call_intrinsic "llvm.x86.vbcstnebf162ps256"
- %0 = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
// CHECK-LABEL: func @avx_rsqrt
func.func @avx_rsqrt(%a: vector<8xf32>) -> (vector<8xf32>)
{
diff --git a/mlir/test/Dialect/X86Vector/roundtrip.mlir b/mlir/test/Dialect/X86Vector/roundtrip.mlir
index b783cc869b981..0d00448c63da8 100644
--- a/mlir/test/Dialect/X86Vector/roundtrip.mlir
+++ b/mlir/test/Dialect/X86Vector/roundtrip.mlir
@@ -94,66 +94,6 @@ func.func @avx512bf16_cvt_packed_f32_to_bf16_512(
return %0 : vector<16xbf16>
}
-// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128
-func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_128(
- %a: memref<8xbf16>) -> vector<4xf32>
-{
- // CHECK: x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 {{.*}} :
- // CHECK-SAME: memref<8xbf16> -> vector<4xf32>
- %0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<8xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256
-func.func @avxbf16_cvt_packed_even_indexed_bf16_to_f32_256(
- %a: memref<16xbf16>) -> vector<8xf32>
-{
- // CHECK: x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 {{.*}} :
- // CHECK-SAME: memref<16xbf16> -> vector<8xf32>
- %0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128
-func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_128(
- %a: memref<8xbf16>) -> vector<4xf32>
-{
- // CHECK: x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 {{.*}} :
- // CHECK-SAME: memref<8xbf16> -> vector<4xf32>
- %0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<8xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256
-func.func @avxbf16_cvt_packed_odd_indexed_bf16_to_f32_256(
- %a: memref<16xbf16>) -> vector<8xf32>
-{
- // CHECK: x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 {{.*}} :
- // CHECK-SAME: memref<16xbf16> -> vector<8xf32>
- %0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_bcst_bf16_to_f32_128
-func.func @avxbf16_bcst_bf16_to_f32_128(
- %a: memref<1xbf16>) -> vector<4xf32>
-{
- // CHECK: x86vector.avx.bcst.bf16_to_f32.packed {{.*}} :
- // CHECK-SAME: memref<1xbf16> -> vector<4xf32>
- %0 = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: func @avxbf16_bcst_bf16_to_f32_256
-func.func @avxbf16_bcst_bf16_to_f32_256(
- %a: memref<1xbf16>) -> vector<8xf32>
-{
- // CHECK: x86vector.avx.bcst.bf16_to_f32.packed {{.*}} :
- // CHECK-SAME: memref<1xbf16> -> vector<8xf32>
- %0 = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
// CHECK-LABEL: func @avx_rsqrt
func.func @avx_rsqrt(%a: vector<8xf32>) -> (vector<8xf32>)
{
diff --git a/mlir/test/Target/LLVMIR/x86vector.mlir b/mlir/test/Target/LLVMIR/x86vector.mlir
index a8bc180d1d0ac..85dad36334b1d 100644
--- a/mlir/test/Target/LLVMIR/x86vector.mlir
+++ b/mlir/test/Target/LLVMIR/x86vector.mlir
@@ -1,4 +1,4 @@
-// RUN: mlir-opt %s --convert-vector-to-llvm="enable-x86vector" --convert-to-llvm -reconcile-unrealized-casts \
+// RUN: mlir-opt %s --convert-vector-to-llvm="enable-x86vector" --convert-to-llvm \
// RUN: | mlir-translate --mlir-to-llvmir \
// RUN: | FileCheck %s
@@ -109,60 +109,6 @@ func.func @LLVM_x86_avx512bf16_cvtneps2bf16_512(
return %0 : vector<16xbf16>
}
-// CHECK-LABEL: define <4 x float> @LLVM_x86_avxbf16_vcvtneebf162ps128
-func.func @LLVM_x86_avxbf16_vcvtneebf162ps128(
- %a: memref<8xbf16>) -> vector<4xf32>
-{
- // CHECK: call <4 x float> @llvm.x86.vcvtneebf162ps128(
- %0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<8xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: define <8 x float> @LLVM_x86_avxbf16_vcvtneebf162ps256
-func.func @LLVM_x86_avxbf16_vcvtneebf162ps256(
- %a: memref<16xbf16>) -> vector<8xf32>
-{
- // CHECK: call <8 x float> @llvm.x86.vcvtneebf162ps256(
- %0 = x86vector.avx.cvt.packed.even.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
-// CHECK-LABEL: define <4 x float> @LLVM_x86_avxbf16_vcvtneobf162ps128
-func.func @LLVM_x86_avxbf16_vcvtneobf162ps128(
- %a: memref<8xbf16>) -> vector<4xf32>
-{
- // CHECK: call <4 x float> @llvm.x86.vcvtneobf162ps128(
- %0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<8xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: define <8 x float> @LLVM_x86_avxbf16_vcvtneobf162ps256
-func.func @LLVM_x86_avxbf16_vcvtneobf162ps256(
- %a: memref<16xbf16>) -> vector<8xf32>
-{
- // CHECK: call <8 x float> @llvm.x86.vcvtneobf162ps256(
- %0 = x86vector.avx.cvt.packed.odd.indexed.bf16_to_f32 %a : memref<16xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
-// CHECK-LABEL: define <4 x float> @LLVM_x86_avxbf16_vbcstnebf162ps128
-func.func @LLVM_x86_avxbf16_vbcstnebf162ps128(
- %a: memref<1xbf16>) -> vector<4xf32>
-{
- // CHECK: call <4 x float> @llvm.x86.vbcstnebf162ps128(
- %0 = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<4xf32>
- return %0 : vector<4xf32>
-}
-
-// CHECK-LABEL: define <8 x float> @LLVM_x86_avxbf16_vbcstnebf162ps256
-func.func @LLVM_x86_avxbf16_vbcstnebf162ps256(
- %a: memref<1xbf16>) -> vector<8xf32>
-{
- // CHECK: call <8 x float> @llvm.x86.vbcstnebf162ps256(
- %0 = x86vector.avx.bcst.bf16_to_f32.packed %a : memref<1xbf16> -> vector<8xf32>
- return %0 : vector<8xf32>
-}
-
// CHECK-LABEL: define <8 x float> @LLVM_x86_avx_rsqrt_ps_256
func.func @LLVM_x86_avx_rsqrt_ps_256(%a: vector <8xf32>) -> vector<8xf32>
{
More information about the Mlir-commits
mailing list