[clang] [X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics (PR #120927)
Phoebe Wang via cfe-commits
cfe-commits at lists.llvm.org
Sun Dec 22 17:07:46 PST 2024
https://github.com/phoebewang created https://github.com/llvm/llvm-project/pull/120927
Found during review #120766
>From 13176bdf116f54aee7ffed4b22cace39776c6515 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Dec 2024 09:05:06 +0800
Subject: [PATCH] [X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics
Found during review #120766
---
clang/lib/Headers/avx10_2_512convertintrin.h | 4 ++--
clang/lib/Headers/avx10_2convertintrin.h | 4 ++--
clang/test/CodeGen/X86/avx10_2_512convert-builtins.c | 4 ++--
clang/test/CodeGen/X86/avx10_2convert-builtins.c | 4 ++--
4 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index a34e135fa30473..60a5b1ef4548d8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -308,13 +308,13 @@ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {
}
static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {
+_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) {
return _mm512_castsi512_ph(
_mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
}
static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {
+_mm512_maskz_cvtpbf8_ph(__mmask32 __U, __m256i __A) {
return _mm512_castsi512_ph(
_mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
}
diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h
index 134adb2850c8de..efe8477cbbf9be 100644
--- a/clang/lib/Headers/avx10_2convertintrin.h
+++ b/clang/lib/Headers/avx10_2convertintrin.h
@@ -580,13 +580,13 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) {
}
static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask8 __U, __m128i __A) {
+_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {
return _mm256_castsi256_ph(
_mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
}
static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {
+_mm256_maskz_cvtpbf8_ph(__mmask16 __U, __m128i __A) {
return _mm256_castsi256_ph(
_mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));
}
diff --git a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
index e71cc0c9ad6b02..6662e0cbf8a913 100644
--- a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
@@ -299,7 +299,7 @@ __m512h test_mm512_cvtpbf8_ph(__m256i A) {
return _mm512_cvtpbf8_ph(A);
}
-__m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask16 M, __m256i A) {
+__m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask32 M, __m256i A) {
// CHECK-LABEL: @test_mm512_mask_cvtpbf8_ph
// CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
// CHECK: @llvm.x86.avx512.pslli.w.512
@@ -308,7 +308,7 @@ __m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask16 M, __m256i A) {
return _mm512_mask_cvtpbf8_ph(S, M, A);
}
-__m512h test_mm512_maskz_cvtpbf8_ph(__mmask16 M, __m256i A) {
+__m512h test_mm512_maskz_cvtpbf8_ph(__mmask32 M, __m256i A) {
// CHECK-LABEL: @test_mm512_maskz_cvtpbf8_ph
// CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
diff --git a/clang/test/CodeGen/X86/avx10_2convert-builtins.c b/clang/test/CodeGen/X86/avx10_2convert-builtins.c
index 8086c1b5d33993..7121b28719c077 100644
--- a/clang/test/CodeGen/X86/avx10_2convert-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2convert-builtins.c
@@ -593,7 +593,7 @@ __m128h test_mm_cvtpbf8_ph(__m128i A) {
return _mm_cvtpbf8_ph(A);
}
-__m128h test_mm_mask_cvtpbf8_ph(__m128h S, __mmask8 M, __m128i A) {
+__m128h test_mm_mask_cvtpbf8_ph(__m128h S, __mmask16 M, __m128i A) {
// CHECK-LABEL: @test_mm_mask_cvtpbf8_ph
// CHECK: sext <8 x i8> %{{.*}} to <8 x i16>
// CHECK: @llvm.x86.sse2.pslli.w
@@ -602,7 +602,7 @@ __m128h test_mm_mask_cvtpbf8_ph(__m128h S, __mmask8 M, __m128i A) {
return _mm_mask_cvtpbf8_ph(S, M, A);
}
-__m128h test_mm_maskz_cvtpbf8_ph(__mmask8 M, __m128i A) {
+__m128h test_mm_maskz_cvtpbf8_ph(__mmask16 M, __m128i A) {
// CHECK-LABEL: @test_mm_maskz_cvtpbf8_ph
// CHECK: sext <8 x i8> %{{.*}} to <8 x i16>
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
More information about the cfe-commits
mailing list