[clang] eb9dc0c - [X86] add 3 missing intrinsics: _mm_(mask/maskz)_cvtpbh_ps
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 17 16:48:37 PST 2021
Author: Freddy Ye
Date: 2021-11-18T08:48:19+08:00
New Revision: eb9dc0c78f97984ed1ef08feb33422e2c7463cac
URL: https://github.com/llvm/llvm-project/commit/eb9dc0c78f97984ed1ef08feb33422e2c7463cac
DIFF: https://github.com/llvm/llvm-project/commit/eb9dc0c78f97984ed1ef08feb33422e2c7463cac.diff
LOG: [X86] add 3 missing intrinsics: _mm_(mask/maskz)_cvtpbh_ps
Reviewed By: craig.topper, pengfei
Differential Revision: https://reviews.llvm.org/D114059
Added:
Modified:
clang/lib/Headers/avx512bf16intrin.h
clang/lib/Headers/avx512vlbf16intrin.h
clang/test/CodeGen/X86/avx512vlbf16-builtins.c
Removed:
################################################################################
diff --git a/clang/lib/Headers/avx512bf16intrin.h b/clang/lib/Headers/avx512bf16intrin.h
index d1d87e72f147..09653738d40a 100644
--- a/clang/lib/Headers/avx512bf16intrin.h
+++ b/clang/lib/Headers/avx512bf16intrin.h
@@ -232,7 +232,7 @@ _mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) {
///
/// \param __A
/// A 256-bit vector of [16 x bfloat].
-/// \returns A 512-bit vector of [16 x float] come from convertion of __A
+/// \returns A 512-bit vector of [16 x float] come from conversion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
(__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16));
@@ -247,7 +247,7 @@ static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) {
/// bit is not set.
/// \param __A
/// A 256-bit vector of [16 x bfloat].
-/// \returns A 512-bit vector of [16 x float] come from convertion of __A
+/// \returns A 512-bit vector of [16 x float] come from conversion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32(
@@ -265,7 +265,7 @@ _mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) {
/// A 16-bit mask.
/// \param __A
/// A 256-bit vector of [16 x bfloat].
-/// \returns A 512-bit vector of [16 x float] come from convertion of __A
+/// \returns A 512-bit vector of [16 x float] come from conversion of __A
static __inline__ __m512 __DEFAULT_FN_ATTRS512
_mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) {
return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32(
diff --git a/clang/lib/Headers/avx512vlbf16intrin.h b/clang/lib/Headers/avx512vlbf16intrin.h
index 1b1a744bcdbf..6a5a86071f0b 100644
--- a/clang/lib/Headers/avx512vlbf16intrin.h
+++ b/clang/lib/Headers/avx512vlbf16intrin.h
@@ -420,18 +420,46 @@ static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) {
return __R[0];
}
+/// Convert Packed BF16 Data to Packed float Data.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __A
+/// A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtpbh_ps(__m128bh __A) {
+ return _mm_castsi128_ps(
+ (__m128i)_mm_slli_epi32((__m128i)_mm_cvtepi16_epi32((__m128i)__A), 16));
+}
+
/// Convert Packed BF16 Data to Packed float Data.
///
/// \headerfile <x86intrin.h>
///
/// \param __A
/// A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
(__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16));
}
+/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __U
+/// A 4-bit mask. Elements are zeroed out when the corresponding mask
+/// bit is not set.
+/// \param __A
+/// A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
+ return _mm_castsi128_ps((__m128i)_mm_slli_epi32(
+ (__m128i)_mm_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
+}
+
/// Convert Packed BF16 Data to Packed float Data using zeroing mask.
///
/// \headerfile <x86intrin.h>
@@ -441,13 +469,33 @@ static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) {
/// bit is not set.
/// \param __A
/// A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32(
(__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16));
}
+/// Convert Packed BF16 Data to Packed float Data using merging mask.
+///
+/// \headerfile <x86intrin.h>
+///
+/// \param __S
+/// A 128-bit vector of [4 x float]. Elements are copied from __S when
+/// the corresponding mask bit is not set.
+/// \param __U
+/// A 4-bit mask. Elements are zeroed out when the corresponding mask
+/// bit is not set.
+/// \param __A
+/// A 128-bit vector of [4 x bfloat].
+/// \returns A 128-bit vector of [4 x float] come from conversion of __A
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
+_mm_mask_cvtpbh_ps(__m128 __S, __mmask8 __U, __m128bh __A) {
+ return _mm_castsi128_ps((__m128i)_mm_mask_slli_epi32(
+ (__m128i)__S, (__mmask8)__U, (__m128i)_mm_cvtepi16_epi32((__m128i)__A),
+ 16));
+}
+
/// Convert Packed BF16 Data to Packed float Data using merging mask.
///
/// \headerfile <x86intrin.h>
@@ -460,7 +508,7 @@ _mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) {
/// bit is not set.
/// \param __A
/// A 128-bit vector of [8 x bfloat].
-/// \returns A 256-bit vector of [8 x float] come from convertion of __A
+/// \returns A 256-bit vector of [8 x float] come from conversion of __A
static __inline__ __m256 __DEFAULT_FN_ATTRS256
_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) {
return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32(
diff --git a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
index e30c4a344711..36dec9b40c7f 100644
--- a/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbf16-builtins.c
@@ -169,6 +169,15 @@ __bfloat16 test_mm_cvtness_sbh(float A) {
return _mm_cvtness_sbh(A);
}
+__m128 test_mm_cvtpbh_ps(__m128bh A) {
+ // CHECK-LABEL: @test_mm_cvtpbh_ps
+ // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+ // CHECK: @llvm.x86.sse2.pslli.d
+ // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+ // CHECK: ret <4 x float> %{{.*}}
+ return _mm_cvtpbh_ps(A);
+}
+
__m256 test_mm256_cvtpbh_ps(__m128bh A) {
// CHECK-LABEL: @test_mm256_cvtpbh_ps
// CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -178,6 +187,16 @@ __m256 test_mm256_cvtpbh_ps(__m128bh A) {
return _mm256_cvtpbh_ps(A);
}
+__m128 test_mm_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
+ // CHECK-LABEL: @test_mm_maskz_cvtpbh_ps
+ // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+ // CHECK: @llvm.x86.sse2.pslli.d
+ // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+ // CHECK: ret <4 x float> %{{.*}}
+ return _mm_maskz_cvtpbh_ps(M, A);
+}
+
__m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
// CHECK-LABEL: @test_mm256_maskz_cvtpbh_ps
// CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
@@ -188,6 +207,16 @@ __m256 test_mm256_maskz_cvtpbh_ps(__mmask8 M, __m128bh A) {
return _mm256_maskz_cvtpbh_ps(M, A);
}
+__m128 test_mm_mask_cvtpbh_ps(__m128 S, __mmask8 M, __m128bh A) {
+ // CHECK-LABEL: @test_mm_mask_cvtpbh_ps
+ // CHECK: sext <4 x i16> %{{.*}} to <4 x i32>
+ // CHECK: @llvm.x86.sse2.pslli.d
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+ // CHECK: bitcast <2 x i64> %{{.*}} to <4 x float>
+ // CHECK: ret <4 x float> %{{.*}}
+ return _mm_mask_cvtpbh_ps(S, M, A);
+}
+
__m256 test_mm256_mask_cvtpbh_ps(__m256 S, __mmask8 M, __m128bh A) {
// CHECK-LABEL: @test_mm256_mask_cvtpbh_ps
// CHECK: sext <8 x i16> %{{.*}} to <8 x i32>
More information about the cfe-commits
mailing list