[clang] [X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics (PR #120927)

Phoebe Wang via cfe-commits cfe-commits at lists.llvm.org
Sun Dec 22 17:10:08 PST 2024


https://github.com/phoebewang updated https://github.com/llvm/llvm-project/pull/120927

>From 2b9cde28c2093556d006b5ff0103137f52a7a751 Mon Sep 17 00:00:00 2001
From: "Wang, Phoebe" <phoebe.wang at intel.com>
Date: Mon, 23 Dec 2024 09:05:06 +0800
Subject: [PATCH] [X86][AVX10.2] Fix wrong mask bits in cvtpbf8_ph intrinsics

Found during review #120766
---
 clang/lib/Headers/avx10_2_512convertintrin.h         | 4 ++--
 clang/lib/Headers/avx10_2convertintrin.h             | 4 ++--
 clang/test/CodeGen/X86/avx10_2_512convert-builtins.c | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
index a34e135fa30473..60a5b1ef4548d8 100644
--- a/clang/lib/Headers/avx10_2_512convertintrin.h
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -308,13 +308,13 @@ static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {
 }
 
 static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {
+_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask32 __U, __m256i __A) {
   return _mm512_castsi512_ph(
       _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
 }
 
 static __inline __m512h __DEFAULT_FN_ATTRS512
-_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {
+_mm512_maskz_cvtpbf8_ph(__mmask32 __U, __m256i __A) {
   return _mm512_castsi512_ph(
       _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
 }
diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h
index 134adb2850c8de..efe8477cbbf9be 100644
--- a/clang/lib/Headers/avx10_2convertintrin.h
+++ b/clang/lib/Headers/avx10_2convertintrin.h
@@ -580,13 +580,13 @@ static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtpbf8_ph(__m128i __A) {
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask8 __U, __m128i __A) {
+_mm256_mask_cvtpbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {
   return _mm256_castsi256_ph(
       _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
 }
 
 static __inline__ __m256h __DEFAULT_FN_ATTRS256
-_mm256_maskz_cvtpbf8_ph(__mmask8 __U, __m128i __A) {
+_mm256_maskz_cvtpbf8_ph(__mmask16 __U, __m128i __A) {
   return _mm256_castsi256_ph(
       _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));
 }
diff --git a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
index e71cc0c9ad6b02..6662e0cbf8a913 100644
--- a/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
+++ b/clang/test/CodeGen/X86/avx10_2_512convert-builtins.c
@@ -299,7 +299,7 @@ __m512h test_mm512_cvtpbf8_ph(__m256i A) {
   return _mm512_cvtpbf8_ph(A);
 }
 
-__m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask16 M, __m256i A) {
+__m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask32 M, __m256i A) {
   // CHECK-LABEL: @test_mm512_mask_cvtpbf8_ph
   // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
   // CHECK: @llvm.x86.avx512.pslli.w.512
@@ -308,7 +308,7 @@ __m512h test_mm512_mask_cvtpbf8_ph(__m512h S, __mmask16 M, __m256i A) {
   return _mm512_mask_cvtpbf8_ph(S, M, A);
 }
 
-__m512h test_mm512_maskz_cvtpbf8_ph(__mmask16 M, __m256i A) {
+__m512h test_mm512_maskz_cvtpbf8_ph(__mmask32 M, __m256i A) {
   // CHECK-LABEL: @test_mm512_maskz_cvtpbf8_ph
   // CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
   // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}



More information about the cfe-commits mailing list