[clang] Allow avx512 bw masked intrinsics to be used in constexpr (PR #162871)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Oct 10 08:31:13 PDT 2025
https://github.com/GrumpyPigSkin created https://github.com/llvm/llvm-project/pull/162871
Added CONSTEXPR macro and test for the following intrinsics:
-- _mm_mask_adds_epi16 _mm_maskz_adds_epi16
-- _mm_mask_adds_epi8 _mm_maskz_adds_epi8
-- _mm_mask_adds_epu16 _mm_maskz_adds_epu16
-- _mm_mask_adds_epu8 _mm_maskz_adds_epu8
-- _mm_mask_broadcastb_epi8 _mm_maskz_broadcastb_epi8
-- _mm_mask_broadcastw_epi16 _mm_maskz_broadcastw_epi16
-- _mm_mask_cvtepi8_epi16 _mm_maskz_cvtepi8_epi16
-- _mm_mask_cvtepu8_epi16 _mm_maskz_cvtepu8_epi16
-- _mm_mask_packs_epi16 _mm_maskz_packs_epi16
-- _mm_mask_packs_epi32 _mm_maskz_packs_epi32
-- _mm_mask_packus_epi16 _mm_maskz_packus_epi16
-- _mm_mask_packus_epi32 _mm_maskz_packus_epi32
-- _mm_mask_set1_epi16 _mm_maskz_set1_epi16
-- _mm_mask_set1_epi8 _mm_maskz_set1_epi8
-- _mm_mask_slli_epi16 _mm_mask_slli_epi16
-- _mm_mask_subs_epi16 _mm_maskz_subs_epi16
-- _mm_mask_subs_epi8 _mm_maskz_subs_epi8
-- _mm_mask_subs_epu16 _mm_maskz_subs_epu16
-- _mm_mask_subs_epu8 _mm_maskz_subs_epu8
-- _mm_mask_unpackhi_epi16 _mm_maskz_unpackhi_epi16
-- _mm_mask_unpackhi_epi8 _mm_maskz_unpackhi_epi8
-- _mm_mask_unpacklo_epi16 _mm_maskz_unpacklo_epi16
-- _mm_mask_unpacklo_epi8 _mm_maskz_unpacklo_epi8
-- _mm256_mask_adds_epi16 _mm256_maskz_adds_epi16
-- _mm256_mask_adds_epi8 _mm256_maskz_adds_epi8
-- _mm256_mask_adds_epu16 _mm256_maskz_adds_epu16
-- _mm256_mask_adds_epu8 _mm256_maskz_adds_epu8
-- _mm256_mask_broadcastb_epi8 _mm256_maskz_broadcastb_epi8
-- _mm256_mask_broadcastw_epi16 _mm256_maskz_broadcastw_epi16
-- _mm256_mask_cvtepi8_epi16 _mm256_maskz_cvtepi8_epi16
-- _mm256_mask_cvtepu8_epi16 _mm256_maskz_cvtepu8_epi16
-- _mm256_mask_packs_epi16 _mm256_maskz_packs_epi16
-- _mm256_mask_packs_epi32 _mm256_maskz_packs_epi32
-- _mm256_mask_packus_epi16 _mm256_maskz_packus_epi16
-- _mm256_mask_packus_epi32 _mm256_maskz_packus_epi32
-- _mm256_mask_set1_epi16 _mm256_maskz_set1_epi16
-- _mm256_mask_set1_epi8 _mm256_maskz_set1_epi8
-- _mm256_mask_slli_epi16 _mm256_mask_slli_epi16
-- _mm256_mask_subs_epi16 _mm256_maskz_subs_epi16
-- _mm256_mask_subs_epi8 _mm256_maskz_subs_epi8
-- _mm256_mask_subs_epu16 _mm256_maskz_subs_epu16
-- _mm256_mask_subs_epu8 _mm256_maskz_subs_epu8
-- _mm256_mask_unpackhi_epi16 _mm256_maskz_unpackhi_epi16
-- _mm256_mask_unpackhi_epi8 _mm256_maskz_unpackhi_epi8
-- _mm256_mask_unpacklo_epi16 _mm256_maskz_unpacklo_epi16
-- _mm256_mask_unpacklo_epi8 _mm256_maskz_unpacklo_epi8
-- _mm512_mask_adds_epi16 _mm512_maskz_adds_epi16
-- _mm512_mask_adds_epi8 _mm512_maskz_adds_epi8
-- _mm512_mask_adds_epu16 _mm512_maskz_adds_epu16
-- _mm512_mask_adds_epu8 _mm512_maskz_adds_epu8
-- _mm512_mask_broadcastb_epi8 _mm512_maskz_broadcastb_epi8
-- _mm512_mask_broadcastw_epi16 _mm512_maskz_broadcastw_epi16
-- _mm512_mask_mov_epi16 _mm512_maskz_mov_epi16
-- _mm512_mask_mov_epi8 _mm512_maskz_mov_epi8
-- _mm512_mask_packs_epi16 _mm512_maskz_packs_epi16
-- _mm512_mask_packs_epi32 _mm512_maskz_packs_epi32
-- _mm512_mask_packus_epi16 _mm512_maskz_packus_epi16
-- _mm512_mask_packus_epi32 _mm512_maskz_packus_epi32
-- _mm512_mask_set1_epi16 _mm512_maskz_set1_epi16
-- _mm512_mask_set1_epi8 _mm512_maskz_set1_epi8
-- _mm512_mask_subs_epi16 _mm512_maskz_subs_epi16
-- _mm512_mask_subs_epi8 _mm512_maskz_subs_epi8
-- _mm512_mask_subs_epu16 _mm512_maskz_subs_epu16
-- _mm512_mask_subs_epu8 _mm512_maskz_subs_epu8
-- _mm512_mask_unpackhi_epi16 _mm512_maskz_unpackhi_epi16
-- _mm512_mask_unpackhi_epi8 _mm512_maskz_unpackhi_epi8
-- _mm512_mask_unpacklo_epi16 _mm512_maskz_unpacklo_epi16
-- _mm512_mask_unpacklo_epi8 _mm512_maskz_unpacklo_epi8
closes #162070
@RKSimon I have left the test expanded since I thought it would be easier to review them that way.
>From 733bcc2df0e4c50d8b5be1f5a2161aae8c6acf34 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Tue, 7 Oct 2025 00:42:49 +0100
Subject: [PATCH 1/6] Interim commit for avx512
---
clang/lib/Headers/avx512bwintrin.h | 78 ++-
clang/test/CodeGen/X86/avx512bw-builtins.c | 621 ++++++++++++++++++++-
2 files changed, 654 insertions(+), 45 deletions(-)
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index c36bd814725fa..04841bba93b1b 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -515,7 +515,7 @@ _mm512_packs_epi32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
@@ -523,9 +523,8 @@ _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
(__v32hi)_mm512_packs_epi32(__A, __B),
(__v32hi)__W);
@@ -536,7 +535,7 @@ _mm512_packs_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -544,7 +543,7 @@ _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -599,17 +598,15 @@ _mm512_adds_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_adds_epi8(__A, __B),
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_adds_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
@@ -620,7 +617,7 @@ _mm512_adds_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -628,7 +625,7 @@ _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -641,7 +638,7 @@ _mm512_adds_epu8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -649,7 +646,7 @@ _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -662,17 +659,15 @@ _mm512_adds_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_adds_epu16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_adds_epu16(__A, __B),
(__v32hi)_mm512_setzero_si512());
@@ -1580,7 +1575,7 @@ _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) {
((__m512i)__builtin_ia32_psrldqi512_byteshift((__v64qi)(__m512i)(a), \
(int)(imm)))
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
@@ -1588,23 +1583,21 @@ _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
(__v32hi) __W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_mov_epi16(__mmask32 __U, __m512i __A) {
return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
(__v32hi) __A,
(__v32hi) _mm512_setzero_si512 ());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_mask_mov_epi8(__m512i __W, __mmask64 __U, __m512i __A) {
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
(__v64qi) __A,
(__v64qi) __W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
@@ -1612,7 +1605,7 @@ _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
(__v64qi) _mm512_setzero_si512 ());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
{
return (__m512i) __builtin_ia32_selectb_512(__M,
@@ -1620,9 +1613,8 @@ _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
(__v64qi) __O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
return (__m512i) __builtin_ia32_selectb_512(__M,
(__v64qi) _mm512_set1_epi8(__A),
(__v64qi) _mm512_setzero_si512());
@@ -1815,7 +1807,7 @@ _mm512_broadcastb_epi8(__m128i __A) {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
{
return (__m512i)__builtin_ia32_selectb_512(__M,
@@ -1823,15 +1815,14 @@ _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
(__v64qi) __O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) {
return (__m512i)__builtin_ia32_selectb_512(__M,
(__v64qi) _mm512_broadcastb_epi8(__A),
(__v64qi) _mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
{
return (__m512i) __builtin_ia32_selectw_512(__M,
@@ -1839,9 +1830,8 @@ _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
(__v32hi) __O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
-_mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
-{
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
+_mm512_maskz_set1_epi16(__mmask32 __M, short __A) {
return (__m512i) __builtin_ia32_selectw_512(__M,
(__v32hi) _mm512_set1_epi16(__A),
(__v32hi) _mm512_setzero_si512());
@@ -1854,7 +1844,7 @@ _mm512_broadcastw_epi16(__m128i __A) {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
{
return (__m512i)__builtin_ia32_selectw_512(__M,
@@ -1862,7 +1852,7 @@ _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
(__v32hi) __O);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
{
return (__m512i)__builtin_ia32_selectw_512(__M,
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index bd19363c8d948..ef7b681a0a5e2 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -9,6 +9,7 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion -fexperimental-new-constant-interpreter | FileCheck %s
+#include <emmintrin.h>
#include <immintrin.h>
#include "builtin_test_helpers.h"
@@ -1065,12 +1066,74 @@ __m512i test_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_packs_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_packs_epi32(
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v16si){
+ 50000, 50000, 50000, 50000,
+ 50000, 50000, 50000, 50000,
+ 50000, 50000, 50000, 50000,
+ 50000, 50000, 50000, 50000
+ },
+ (__m512i)(__v16si){
+ -50000, -50000, -50000, -50000,
+ -50000, -50000, -50000, -50000,
+ -50000, -50000, -50000, -50000,
+ -50000, -50000, -50000, -50000
+ }
+ ),
+ 0, 32767, 0, 32767,
+ 0, -32768, 0, -32768,
+ 0, 32767, 0, 32767,
+ 0, -32768, 0, -32768,
+ 0, 32767, 0, 32767,
+ 0, -32768, 0, -32768,
+ 0, 32767, 0, 32767,
+ 0, -32768, 0, -32768
+ )
+);
+
__m512i test_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_packs_epi32
// CHECK: @llvm.x86.avx512.packssdw.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_packs_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_packs_epi32(
+ (__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v16si){
+ 50000, 50000, 50000, 50000,
+ 50000, 50000, 50000, 50000,
+ 50000, 50000, 50000, 50000,
+ 50000, 50000, 50000, 50000
+ },
+ (__m512i)(__v16si){
+ -50000, -50000, -50000, -50000,
+ -50000, -50000, -50000, -50000,
+ -50000, -50000, -50000, -50000,
+ -50000, -50000, -50000, -50000
+ }
+ ),
+ 1, 32767, 3, 32767,
+ 5, -32768, 7, -32768,
+ 9, 32767, 11, 32767,
+ 13, -32768, 15, -32768,
+ 17, 32767, 19, 32767,
+ 21, -32768, 23, -32768,
+ 25, 32767, 27, 32767,
+ 29, -32768, 31, -32768
+ )
+);
+
__m512i test_mm512_packs_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packs_epi16
// CHECK: @llvm.x86.avx512.packsswb.512
@@ -1083,12 +1146,94 @@ __m512i test_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_packs_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_packs_epi16(
+ (__m512i)(__v64qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767
+ },
+ (__m512i)(__v32hi){
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767
+ }
+ ),
+ 1, 127, 3, 127, 5, 127, 7, 127,
+ 9, -128, 11, -128, 13, -128, 15, -128,
+ 17, 127, 19, 127, 21, 127, 23, 127,
+ 25, -128, 27, -128, 29, -128, 31, -128,
+ 33, 127, 35, 127, 37, 127, 39, 127,
+ 41, -128, 43, -128, 45, -128, 47, -128,
+ 49, 127, 51, 127, 53, 127, 55, 127,
+ 57, -128, 59, -128, 61, -128, 63, -128
+ )
+);
+
__m512i test_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_packs_epi16
// CHECK: @llvm.x86.avx512.packsswb.512
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_packs_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_packs_epi16(
+ (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767
+ },
+ (__m512i)(__v32hi){
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767
+ }
+ ),
+ 0, 127, 0, 127, 0, 127, 0, 127,
+ 0, -128, 0, -128, 0, -128, 0, -128,
+ 0, 127, 0, 127, 0, 127, 0, 127,
+ 0, -128, 0, -128, 0, -128, 0, -128,
+ 0, 127, 0, 127, 0, 127, 0, 127,
+ 0, -128, 0, -128, 0, -128, 0, -128,
+ 0, 127, 0, 127, 0, 127, 0, 127,
+ 0, -128, 0, -128, 0, -128, 0, -128
+ )
+);
+
__m512i test_mm512_packus_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packus_epi32
// CHECK: @llvm.x86.avx512.packusdw.512
@@ -1138,12 +1283,96 @@ __m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_adds_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_adds_epi8(
+ (__m512i)(__v64qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__m512i)(__v64qs){
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64
+ },
+ (__m512i)(__v64qs){
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64
+ }
+ ),
+ 1, 126, 3, 0, 5, 127, 7, 0,
+ 9, 126, 11, 0, 13, 127, 15, 0,
+ 17, 126, 19, 0, 21, 127, 23, 0,
+ 25, 126, 27, 0, 29, 127, 31, 0,
+ 33, 126, 35, 0, 37, 127, 39, 0,
+ 41, 126, 43, 0, 45, 127, 47, 0,
+ 49, 126, 51, 0, 53, 127, 55, 0,
+ 57, 126, 59, 0, 61, 127, 63, 0
+ )
+);
+
+
+
__m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epi8
// CHECK: @llvm.sadd.sat.v64i8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_adds_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_adds_epi8(
+ (__mmask64)0xFFFFFFFFFFFFFFFFu,
+ (__m512i)(__v64qs){
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64,
+ 127, 63, 1, 0, 1, 64, -127, 64
+ },
+ (__m512i)(__v64qs){
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64,
+ 127, 63, 1, 0, -127, 64, -127, -64
+ }
+ ),
+ 127, 126, 2, 0, -126, 127, -128, 0,
+ 127, 126, 2, 0, -126, 127, -128, 0,
+ 127, 126, 2, 0, -126, 127, -128, 0,
+ 127, 126, 2, 0, -126, 127, -128, 0,
+ 127, 126, 2, 0, -126, 127, -128, 0,
+ 127, 126, 2, 0, -126, 127, -128, 0,
+ 127, 126, 2, 0, -126, 127, -128, 0,
+ 127, 126, 2, 0, -126, 127, -128, 0
+ )
+);
+
__m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epi16
// CHECK: @llvm.sadd.sat.v32i16
@@ -1157,12 +1386,70 @@ __m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_adds_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_adds_epi16(
+ (__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 30000, 100, -20000, 15000, -32768, 16384, 20000, -100,
+ 30000, 100, -20000, 15000, -32768, 16384, 20000, -100,
+ 30000, 100, -20000, 15000, -32768, 16384, 20000, -100,
+ 30000, 100, -20000, 15000, -32768, 16384, 20000, -100
+ },
+ (__m512i)(__v32hi){
+ 30000, 200, -20000, 20000, -1, 20000, 20000, 50,
+ 30000, 200, -20000, 20000, -1, 20000, 20000, 50,
+ 30000, 200, -20000, 20000, -1, 20000, 20000, 50,
+ 30000, 200, -20000, 20000, -1, 20000, 20000, 50
+ }
+ ),
+ 1, 300, 3, 32767, 5, 32767, 7, -50,
+ 9, 300, 11, 32767, 13, 32767, 15, -50,
+ 17, 300, 19, 32767, 21, 32767, 23, -50,
+ 25, 300, 27, 32767, 29, 32767, 31, -50
+
+ )
+);
+
+
__m512i test_mm512_maskz_adds_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epi16
// CHECK: @llvm.sadd.sat.v32i16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_adds_epi16(__U,__A,__B);
}
+
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_adds_epi16(
+ (__mmask32)0xFFFFFFFFu,
+ (__m512i)(__v32hi){
+ 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384,
+ 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384,
+ 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384,
+ 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384
+ },
+ (__m512i)(__v32hi){
+ 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384,
+ 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384,
+ 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384,
+ 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384
+ }
+ ),
+
+ 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767,
+ 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767,
+ 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767,
+ 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767
+ )
+);
+
__m512i test_mm512_adds_epu8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epu8
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.b.512
@@ -1178,7 +1465,51 @@ __m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_adds_epu8(__W,__U,__A,__B);
}
-TEST_CONSTEXPR(match_v32hu(_mm512_adds_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, +32767, +32768, +65535, +16384, +32768, +49151, +49152, +65535, +65535, +32767, +49151, +65534, +65535, +65535, +65535, +32768, +49152, +65535, +65535, +65535, +65535, +49152, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535));
+TEST_CONSTEXPR(
+ match_v64qu(
+ _mm512_mask_adds_epu8(
+ (__m512i)(__v64qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__m512i)(__v64qu){
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255
+ },
+ (__m512i)(__v64qu){
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ }
+ ),
+ 1, 201, 3, 150, 5, 200, 7, 255,
+ 9, 201, 11, 150, 13, 200, 15, 255,
+ 17, 201, 19, 150, 21, 200, 23, 255,
+ 25, 201, 27, 150, 29, 200, 31, 255,
+ 33, 201, 35, 150, 37, 200, 39, 255,
+ 41, 201, 43, 150, 45, 200, 47, 255,
+ 49, 201, 51, 150, 53, 200, 55, 255,
+ 57, 201, 59, 150, 61, 200, 63, 255
+ )
+);
__m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epu8
@@ -1187,12 +1518,60 @@ __m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_adds_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qu(
+ _mm512_mask_adds_epu8(
+ (__m512i)(__v64qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xFFFFFFFFFFFFFFFFu,
+ (__m512i)(__v64qu){
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255,
+ 0, 1, 10, 50, 100, 150, 200, 255
+ },
+ (__m512i)(__v64qu){
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ 255, 200, 150, 100, 80, 50, 20, 10,
+ }
+ ),
+ 255, 201, 160, 150, 180, 200,220, 255,
+ 255, 201, 160, 150, 180, 200, 220, 255,
+ 255, 201, 160, 150, 180, 200, 220, 255,
+ 255, 201, 160, 150, 180, 200, 220, 255,
+ 255, 201, 160, 150, 180, 200, 220, 255,
+ 255, 201, 160, 150, 180, 200, 220, 255,
+ 255, 201, 160, 150, 180, 200, 220, 255,
+ 255, 201, 160, 150, 180, 200, 220, 255
+ )
+);
+
__m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
// CHECK: call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_adds_epu16(__A,__B);
}
+TEST_CONSTEXPR(match_v32hu(_mm512_adds_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, +32767, +32768, +65535, +16384, +32768, +49151, +49152, +65535, +65535, +32767, +49151, +65534, +65535, +65535, +65535, +32768, +49152, +65535, +65535, +65535, +65535, +49152, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535, +65535));
+
__m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_adds_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
@@ -1200,6 +1579,37 @@ __m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m5
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_adds_epu16(__W,__U,__A,__B);
}
+
+TEST_CONSTEXPR(
+ match_v32hu(
+ _mm512_mask_adds_epu16(
+ (__m512i)(__v32hu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hu){
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100
+ },
+ (__m512i)(__v32hu){
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50
+ }
+ ),
+ 1, 300, 3, 65535, 5, 65535, 7, 150,
+ 9, 300, 11, 65535, 13, 65535, 15, 150,
+ 17, 300, 19, 65535, 21, 65535, 23, 150,
+ 25, 300, 27, 65535, 29, 65535, 31, 150
+ )
+);
+
__m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.paddus.w.512
@@ -1207,6 +1617,31 @@ __m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_adds_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hu(
+ _mm512_maskz_adds_epu16(
+ (__mmask32)0xFFFFFFFFu,
+ (__m512i)(__v32hu){
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
+ 30000, 100, 20000, 45000, 65534, 60000, 20000, 100
+ },
+ (__m512i)(__v32hu){
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
+ 30000, 200, 20000, 30000, 1, 20000, 20000, 50
+ }
+ ),
+ 60000, 300, 40000, 65535, 65535, 65535, 40000, 150,
+ 60000, 300, 40000, 65535, 65535, 65535, 40000, 150,
+ 60000, 300, 40000, 65535, 65535, 65535, 40000, 150,
+ 60000, 300, 40000, 65535, 65535, 65535, 40000, 150
+ )
+);
+
+
__m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_avg_epu8
// CHECK: @llvm.x86.avx512.pavg.b.512
@@ -2202,24 +2637,124 @@ __m512i test_mm512_mask_mov_epi16(__m512i __W, __mmask32 __U, __m512i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_mov_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_mov_epi16(
+ (__m512i)(__v32hi){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ -0, -1, -2, -3, -4, -5, -6, -7,
+ -8, -9, -10, -11, -12, -13, -14, -15,
+ -16, -17, -18, -19, -20, -21, -22, -23,
+ -24, -25, -26, -27, -28, -29, -30, -31
+ }
+ ),
+ 0, -1, 2, -3, 4, -5, 6, -7,
+ 8, -9, 10, -11, 12, -13, 14, -15,
+ 16, -17, 18, -19, 20, -21, 22, -23,
+ 24, -25, 26, -27, 28, -29, 30, -31
+ )
+);
__m512i test_mm512_maskz_mov_epi16(__mmask32 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_mov_epi16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_mov_epi16(__U, __A);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_mov_epi16(
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ -0, -1, -2, -3, -4, -5, -6, -7,
+ -8, -9, -10, -11, -12, -13, -14, -15,
+ -16, -17, -18, -19, -20, -21, -22, -23,
+ -24, -25, -26, -27, -28, -29, -30, -31
+ }
+ ),
+ 0, -1, 0, -3, 0, -5, 0, -7,
+ 0, -9, 0, -11, 0, -13, 0, -15,
+ 0, -17, 0, -19, 0, -21, 0, -23,
+ 0, -25, 0, -27, 0, -29, 0, -31
+ )
+);
__m512i test_mm512_mask_mov_epi8(__m512i __W, __mmask64 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_mask_mov_epi8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_mov_epi8(__W, __U, __A);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_mov_epi8(
+ (__m512i)(__v64qs){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__m512i)(__v64qs){
+ -0, -1, -2, -3, -4, -5, -6, -7,
+ -8, -9, -10, -11, -12, -13, -14, -15,
+ -16, -17, -18, -19, -20, -21, -22, -23,
+ -24, -25, -26, -27, -28, -29, -30, -31,
+ -32, -33, -34, -35, -36, -37, -38, -39,
+ -40, -41, -42, -43, -44, -45, -46, -47,
+ -48, -49, -50, -51, -52, -53, -54, -55,
+ -56, -57, -58, -59, -60, -61, -62, -63
+ }
+ ),
+ 0, -1, 2, -3, 4, -5, 6, -7,
+ 8, -9, 10, -11, 12, -13, 14, -15,
+ 16, -17, 18, -19, 20, -21, 22, -23,
+ 24, -25, 26, -27, 28, -29, 30, -31,
+ 32, -33, 34, -35, 36, -37, 38, -39,
+ 40, -41, 42, -43, 44, -45, 46, -47,
+ 48, -49, 50, -51, 52, -53, 54, -55,
+ 56, -57, 58, -59, 60, -61, 62, -63
+ )
+);
__m512i test_mm512_maskz_mov_epi8(__mmask64 __U, __m512i __A) {
// CHECK-LABEL: test_mm512_maskz_mov_epi8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_mov_epi8(__U, __A);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_mov_epi8(
+ (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__m512i)(__v64qs){
+ -0, -1, -2, -3, -4, -5, -6, -7,
+ -8, -9, -10, -11, -12, -13, -14, -15,
+ -16, -17, -18, -19, -20, -21, -22, -23,
+ -24, -25, -26, -27, -28, -29, -30, -31,
+ -32, -33, -34, -35, -36, -37, -38, -39,
+ -40, -41, -42, -43, -44, -45, -46, -47,
+ -48, -49, -50, -51, -52, -53, -54, -55,
+ -56, -57, -58, -59, -60, -61, -62, -63
+ }
+ ),
+ 0, -1, 0, -3, 0, -5, 0, -7,
+ 0, -9, 0, -11, 0, -13, 0, -15,
+ 0, -17, 0, -19, 0, -21, 0, -23,
+ 0, -25, 0, -27, 0, -29, 0, -31,
+ 0, -33, 0, -35, 0, -37, 0, -39,
+ 0, -41, 0, -43, 0, -45, 0, -47,
+ 0, -49, 0, -51, 0, -53, 0, -55,
+ 0, -57, 0, -59, 0, -61, 0, -63
+ )
+);
__m512i test_mm512_mask_set1_epi8(__m512i __O, __mmask64 __M, char __A) {
// CHECK-LABEL: test_mm512_mask_set1_epi8
@@ -2533,6 +3068,36 @@ __m512i test_mm512_mask_broadcastb_epi8(__m512i __O, __mmask64 __M, __m128i __A)
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_broadcastb_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_broadcastb_epi8(
+ (__m512i)(__v64qs){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__m128i)(__v16qs){
+ -120, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+ }
+ ),
+ 0, -120, 2, -120, 4, -120, 6, -120,
+ 8, -120, 10, -120, 12, -120, 14, -120,
+ 16, -120, 18, -120, 20, -120, 22, -120,
+ 24, -120, 26, -120, 28, -120, 30, -120,
+ 32, -120, 34, -120, 36, -120, 38, -120,
+ 40, -120, 42, -120, 44, -120, 46, -120,
+ 48, -120, 50, -120, 52, -120, 54, -120,
+ 56, -120, 58, -120, 60, -120, 62, -120
+ )
+);
+
__m512i test_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) {
// CHECK-LABEL: test_mm512_maskz_broadcastb_epi8
@@ -2541,6 +3106,26 @@ __m512i test_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) {
return _mm512_maskz_broadcastb_epi8(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_broadcastb_epi8(
+ (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__m128i)(__v16qs){
+ -120, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+ }
+ ),
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120
+ )
+);
+
__m512i test_mm512_broadcastw_epi16(__m128i __A) {
// CHECK-LABEL: test_mm512_broadcastw_epi16
// CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <32 x i32> zeroinitializer
@@ -2554,6 +3139,26 @@ __m512i test_mm512_mask_broadcastw_epi16(__m512i __O, __mmask32 __M, __m128i __A
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_broadcastw_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_broadcastw_epi16(
+ (__m512i)(__v32hi){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m128i)(__v8hi){
+ -120, 1, 2, 3, 4, 5, 6, 7
+ }
+ ),
+ 0, -120, 2, -120, 4, -120, 6, -120,
+ 8, -120, 10, -120, 12, -120, 14, -120,
+ 16, -120, 18, -120, 20, -120, 22, -120,
+ 24, -120, 26, -120, 28, -120, 30, -120
+ )
+);
__m512i test_mm512_maskz_broadcastw_epi16(__mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm512_maskz_broadcastw_epi16
@@ -2561,6 +3166,20 @@ __m512i test_mm512_maskz_broadcastw_epi16(__mmask32 __M, __m128i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_broadcastw_epi16(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_broadcastw_epi16(
+ (__mmask32)0xAAAAAAAAu,
+ (__m128i)(__v8hi){
+ -120, 1, 2, 3, 4, 5, 6, 7
+ }
+ ),
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120
+ )
+);
__m512i test_mm512_mask_set1_epi16(__m512i __O, __mmask32 __M, short __A) {
// CHECK-LABEL: test_mm512_mask_set1_epi16
>From 3bb77a9e3e6a6201f74fc75ba55fe465eef111d6 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Tue, 7 Oct 2025 21:22:12 +0100
Subject: [PATCH 2/6] [X86] Finished making AVX512 mask function constexpr
---
clang/lib/Headers/avx512bwintrin.h | 40 +-
clang/test/CodeGen/X86/avx512bw-builtins.c | 725 ++++++++++++++++++++-
2 files changed, 736 insertions(+), 29 deletions(-)
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 04841bba93b1b..3aa40bb61fe18 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -556,7 +556,7 @@ _mm512_packus_epi32(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
@@ -564,7 +564,7 @@ _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
(__v32hi)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
@@ -577,7 +577,7 @@ _mm512_packus_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -585,7 +585,7 @@ _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
@@ -888,7 +888,7 @@ _mm512_subs_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -896,7 +896,7 @@ _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -909,7 +909,7 @@ _mm512_subs_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -917,7 +917,7 @@ _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -930,7 +930,7 @@ _mm512_subs_epu8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -938,7 +938,7 @@ _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
@@ -951,7 +951,7 @@ _mm512_subs_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -959,7 +959,7 @@ _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
@@ -1200,14 +1200,14 @@ _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
62, 64+62, 63, 64+63);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_unpackhi_epi8(__A, __B),
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_unpackhi_epi8(__A, __B),
@@ -1227,14 +1227,14 @@ _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
30, 32+30, 31, 32+31);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_unpackhi_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_unpackhi_epi16(__A, __B),
@@ -1262,14 +1262,14 @@ _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
54, 64+54, 55, 64+55);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_unpacklo_epi8(__A, __B),
(__v64qi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
(__v64qi)_mm512_unpacklo_epi8(__A, __B),
@@ -1289,14 +1289,14 @@ _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
26, 32+26, 27, 32+27);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_unpacklo_epi16(__A, __B),
(__v32hi)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS512
+static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_unpacklo_epi16(__A, __B),
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index ef7b681a0a5e2..e8e8698a60e6e 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1159,7 +1159,7 @@ TEST_CONSTEXPR(
49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64
},
- (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v32hi){
32767, 32767, 32767, 32767,
32767, 32767, 32767, 32767,
@@ -1201,7 +1201,7 @@ __m512i test_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
TEST_CONSTEXPR(
match_v64qi(
_mm512_maskz_packs_epi16(
- (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v32hi){
32767, 32767, 32767, 32767,
32767, 32767, 32767, 32767,
@@ -1246,12 +1246,75 @@ __m512i test_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_packus_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hu(
+ _mm512_maskz_packus_epi32(
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v16si){
+ 70000, 70000, 70000, 70000,
+ 70000, 70000, 70000, 70000,
+ 70000, 70000, 70000, 70000,
+ 70000, 70000, 70000, 70000
+ },
+ (__m512i)(__v16si){
+ -70000, -70000, -70000, -70000,
+ -70000, -70000, -70000, -70000,
+ -70000, -70000, -70000, -70000,
+ -70000, -70000, -70000, -70000
+ }
+ ),
+ 0, 65535, 0, 65535,
+ 0, 0, 0, 0,
+ 0, 65535, 0, 65535,
+ 0, 0, 0, 0,
+ 0, 65535, 0, 65535,
+ 0, 0, 0, 0,
+ 0, 65535, 0, 65535,
+ 0, 0, 0, 0
+ )
+);
+
+
__m512i test_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_packus_epi32
// CHECK: @llvm.x86.avx512.packusdw.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_packus_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hu(
+ _mm512_mask_packus_epi32(
+ (__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v16si){
+ 70000, 70000, 70000, 70000,
+ 70000, 70000, 70000, 70000,
+ 70000, 70000, 70000, 70000,
+ 70000, 70000, 70000, 70000
+ },
+ (__m512i)(__v16si){
+ -70000, -70000, -70000, -70000,
+ -70000, -70000, -70000, -70000,
+ -70000, -70000, -70000, -70000,
+ -70000, -70000, -70000, -70000
+ }
+ ),
+ 1, 65535, 3, 65535,
+ 5, 0, 7, 0,
+ 9, 65535, 11, 65535,
+ 13, 0, 15, 0,
+ 17, 65535, 19, 65535,
+ 21, 0, 23, 0,
+ 25, 65535, 27, 65535,
+ 29, 0, 31, 0
+ )
+);
+
__m512i test_mm512_packus_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packus_epi16
// CHECK: @llvm.x86.avx512.packuswb.512
@@ -1264,12 +1327,96 @@ __m512i test_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_packus_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qu(
+ _mm512_mask_packus_epi16(
+ (__m512i)(__v64qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v32hi){
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767
+ },
+ (__m512i)(__v32hi){
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767
+ }
+ ),
+ 1, 255, 3,255, 5,255, 7, 255,
+ 9, 0, 11, 0, 13, 0, 15, 0,
+ 17, 255, 19,255, 21,255, 23, 255,
+ 25, 0, 27, 0, 29, 0, 31, 0,
+ 33, 255, 35,255, 37,255, 39, 255,
+ 41, 0, 43, 0, 45, 0, 47, 0,
+ 49, 255, 51,255, 53,255, 55, 255,
+ 57, 0, 59, 0, 61, 0, 63, 0
+ )
+);
+
+
__m512i test_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_packus_epi16
// CHECK: @llvm.x86.avx512.packuswb.512
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_packus_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qu(
+ _mm512_maskz_packus_epi16(
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v32hi){
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767,
+ 32767, 32767, 32767, 32767
+ },
+ (__m512i)(__v32hi){
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767,
+ -32767, -32767, -32767, -32767
+ }
+ ),
+ 0, 255, 0,255, 0,255, 0, 255,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 255, 0,255, 0,255, 0, 255,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 255, 0,255, 0,255, 0, 255,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 255, 0,255, 0,255, 0, 255,
+ 0, 0, 0, 0, 0, 0, 0, 0
+ )
+);
+
+
__m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epi8
// CHECK: @llvm.sadd.sat.v64i8
@@ -1296,7 +1443,7 @@ TEST_CONSTEXPR(
49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64
},
- (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
127, 63, 1, 0, 1, 64, -127, 64,
127, 63, 1, 0, 1, 64, -127, 64,
@@ -1478,7 +1625,7 @@ TEST_CONSTEXPR(
49, 50, 51, 52, 53, 54, 55, 56,
57, 58, 59, 60, 61, 62, 63, 64
},
- (__mmask64)0xAAAAAAAAAAAAAAAAu,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qu){
0, 1, 10, 50, 100, 150, 200, 255,
0, 1, 10, 50, 100, 150, 200, 255,
@@ -1926,12 +2073,95 @@ __m512i test_mm512_mask_subs_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_subs_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_subs_epi8(
+ (__m512i)(__v64qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qs){
+ 1, -100, 3, 4, 5, 6, 7, 8,
+ 9, -100, 11, 12, 13, 14, 15, 16,
+ 17, -100, 19, 20, 21, 22, 23, 24,
+ 25, -100, 27, 28, 29, 30, 31, 32,
+ 33, -100, 35, 36, 37, 38, 39, 40,
+ 41, -100, 43, 44, 45, 46, 47, 48,
+ 49, -100, 51, 52, 53, 54, 55, 56,
+ 57, -100, 59, 60, 61, 62, 63, 64
+ },
+ (__m512i)(__v64qs){
+ 1, 100, 3, 4, 5, 6, 7, 8,
+ 9, 100, 11, 12, 13, 14, 15, 16,
+ 17, 100, 19, 20, 21, 22, 23, 24,
+ 25, 100, 27, 28, 29, 30, 31, 32,
+ 33, 100, 35, 36, 37, 38, 39, 40,
+ 41, 100, 43, 44, 45, 46, 47, 48,
+ 49, 100, 51, 52, 53, 54, 55, 56,
+ 57, 100, 59, 60, 61, 62, 63, 64
+ }
+ ),
+ 1, -128, 3, 0, 5, 0, 7, 0,
+ 9, -128, 11, 0, 13, 0, 15, 0,
+ 17, -128, 19, 0, 21, 0, 23, 0,
+ 25, -128, 27, 0, 29, 0, 31, 0,
+ 33, -128, 35, 0, 37, 0, 39, 0,
+ 41, -128, 43, 0, 45, 0, 47, 0,
+ 49, -128, 51, 0, 53, 0, 55, 0,
+ 57, -128, 59, 0, 61, 0, 63, 0
+ )
+);
+
__m512i test_mm512_maskz_subs_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_subs_epi8
// CHECK: @llvm.ssub.sat.v64i8
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_subs_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_subs_epi8(
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qs){
+ 1, -100, 3, 4, 5, 6, 7, 8,
+ 9, -100, 11, 12, 13, 14, 15, 16,
+ 17, -100, 19, 20, 21, 22, 23, 24,
+ 25, -100, 27, 28, 29, 30, 31, 32,
+ 33, -100, 35, 36, 37, 38, 39, 40,
+ 41, -100, 43, 44, 45, 46, 47, 48,
+ 49, -100, 51, 52, 53, 54, 55, 56,
+ 57, -100, 59, 60, 61, 62, 63, 64
+ },
+ (__m512i)(__v64qs){
+ 1, 100, 3, 4, 5, 6, 7, 8,
+ 9, 100, 11, 12, 13, 14, 15, 16,
+ 17, 100, 19, 20, 21, 22, 23, 24,
+ 25, 100, 27, 28, 29, 30, 31, 32,
+ 33, 100, 35, 36, 37, 38, 39, 40,
+ 41, 100, 43, 44, 45, 46, 47, 48,
+ 49, 100, 51, 52, 53, 54, 55, 56,
+ 57, 100, 59, 60, 61, 62, 63, 64
+ }
+ ),
+ 0, -128, 0, 0, 0, 0, 0, 0,
+ 0, -128, 0, 0, 0, 0, 0, 0,
+ 0, -128, 0, 0, 0, 0, 0, 0,
+ 0, -128, 0, 0, 0, 0, 0, 0,
+ 0, -128, 0, 0, 0, 0, 0, 0,
+ 0, -128, 0, 0, 0, 0, 0, 0,
+ 0, -128, 0, 0, 0, 0, 0, 0,
+ 0, -128, 0, 0, 0, 0, 0, 0
+ )
+);
+
+
__m512i test_mm512_subs_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_subs_epi16
// CHECK: @llvm.ssub.sat.v32i16
@@ -1944,12 +2174,66 @@ __m512i test_mm512_mask_subs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m5
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_subs_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_subs_epi16(
+ (__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 1, -30000, 3, 4, 5, 6, 7, 8,
+ 9, -30000, 11, 12, 13, 14, 15, 16,
+ 17, -30000, 19, 20, 21, 22, 23, 24,
+ 25, -30000, 27, 28, 29, 30, 31, 32
+ },
+ (__m512i)(__v32hi){
+ 1, 30000, 3, 4, 5, 6, 7, 8,
+ 9, 30000, 11, 12, 13, 14, 15, 16,
+ 17, 30000, 19, 20, 21, 22, 23, 24,
+ 25, 30000, 27, 28, 29, 30, 31, 32
+ }
+ ),
+ 1, -32768, 3, 0, 5, 0, 7, 0,
+ 9, -32768, 11, 0, 13, 0, 15, 0,
+ 17, -32768, 19, 0, 21, 0, 23, 0,
+ 25, -32768, 27, 0, 29, 0, 31, 0
+ )
+);
+
__m512i test_mm512_maskz_subs_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_subs_epi16
// CHECK: @llvm.ssub.sat.v32i16
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_subs_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_subs_epi16(
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 1, -30000, 3, 4, 5, 6, 7, 8,
+ 9, -30000, 11, 12, 13, 14, 15, 16,
+ 17, -30000, 19, 20, 21, 22, 23, 24,
+ 25, -30000, 27, 28, 29, 30, 31, 32
+ },
+ (__m512i)(__v32hi){
+ 1, 30000, 3, 4, 5, 6, 7, 8,
+ 9, 30000, 11, 12, 13, 14, 15, 16,
+ 17, 30000, 19, 20, 21, 22, 23, 24,
+ 25, 30000, 27, 28, 29, 30, 31, 32
+ }
+ ),
+ 0, -32768, 0, 0, 0, 0, 0, 0,
+ 0, -32768, 0, 0, 0, 0, 0, 0,
+ 0, -32768, 0, 0, 0, 0, 0, 0,
+ 0, -32768, 0, 0, 0, 0, 0, 0
+ )
+);
+
__m512i test_mm512_subs_epu8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_subs_epu8
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.b.512
@@ -1963,6 +2247,52 @@ __m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_subs_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_subs_epu8(
+ (__m512i)(__v64qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qu){
+ 1, 100, 3, 4, 5, 6, 7, 8,
+ 9, 100, 11, 12, 13, 14, 15, 16,
+ 17, 100, 19, 20, 21, 22, 23, 24,
+ 25, 100, 27, 28, 29, 30, 31, 32,
+ 33, 100, 35, 36, 37, 38, 39, 40,
+ 41, 100, 43, 44, 45, 46, 47, 48,
+ 49, 100, 51, 52, 53, 54, 55, 56,
+ 57, 100, 59, 60, 61, 62, 63, 64
+ },
+ (__m512i)(__v64qu){
+ 1, 200, 3, 4, 5, 6, 7, 8,
+ 9, 200, 11, 12, 13, 14, 15, 16,
+ 17, 200, 19, 20, 21, 22, 23, 24,
+ 25, 200, 27, 28, 29, 30, 31, 32,
+ 33, 200, 35, 36, 37, 38, 39, 40,
+ 41, 200, 43, 44, 45, 46, 47, 48,
+ 49, 200, 51, 52, 53, 54, 55, 56,
+ 57, 200, 59, 60, 61, 62, 63, 64
+ }
+ ),
+ 1, 0, 3, 0, 5, 0, 7, 0,
+ 9, 0, 11, 0, 13, 0, 15, 0,
+ 17, 0, 19, 0, 21, 0, 23, 0,
+ 25, 0, 27, 0, 29, 0, 31, 0,
+ 33, 0, 35, 0, 37, 0, 39, 0,
+ 41, 0, 43, 0, 45, 0, 47, 0,
+ 49, 0, 51, 0, 53, 0, 55, 0,
+ 57, 0, 59, 0, 61, 0, 63, 0
+ )
+);
+
TEST_CONSTEXPR(match_v64qu(_mm512_subs_epu8((__m512i)(__v64qu){0, 0, 0, 0, 0, 0, 0, 0, +63, +63, +63, +63, +63, +63, +63, +63, +64, +64, +64, +64, +64, +64, +64, +64, +127, +127, +127, +127, +127, +127, +127, +127, +128, +128, +128, +128, +128, +128, +128, +128, +191, +191, +191, +191, +191, +191, +191, +191, +192, +192, +192, +192, +192, +192, +192, +192, +255, +255, +255, +255, +255, +255, +255, +255}, (__m512i)(__v64qu){0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255, 0, +63, +64, +127, +128, +191, +192, +255}), 0, 0, 0, 0, 0, 0, 0, 0, +63, 0, 0, 0, 0, 0, 0, 0, +64, +1, 0, 0, 0, 0, 0, 0, +127, +64, +63, 0, 0, 0, 0, 0, +128, +65, +64, +1, 0, 0, 0, 0, +191, +128, +127, +64, +63, 0, 0, 0, +192, +129, +128, +65, +64, +1, 0, 0, +255, +192, +191, +128, +127, +64, +63, +0));
__m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
@@ -1972,20 +2302,88 @@ __m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_subs_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_subs_epu8(
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qu){
+ 1, 100, 3, 4, 5, 6, 7, 8,
+ 9, 100, 11, 12, 13, 14, 15, 16,
+ 17, 100, 19, 20, 21, 22, 23, 24,
+ 25, 100, 27, 28, 29, 30, 31, 32,
+ 33, 100, 35, 36, 37, 38, 39, 40,
+ 41, 100, 43, 44, 45, 46, 47, 48,
+ 49, 100, 51, 52, 53, 54, 55, 56,
+ 57, 100, 59, 60, 61, 62, 63, 64
+ },
+ (__m512i)(__v64qu){
+ 1, 200, 3, 4, 5, 6, 7, 8,
+ 9, 200, 11, 12, 13, 14, 15, 16,
+ 17, 200, 19, 20, 21, 22, 23, 24,
+ 25, 200, 27, 28, 29, 30, 31, 32,
+ 33, 200, 35, 36, 37, 38, 39, 40,
+ 41, 200, 43, 44, 45, 46, 47, 48,
+ 49, 200, 51, 52, 53, 54, 55, 56,
+ 57, 200, 59, 60, 61, 62, 63, 64
+ }
+ ),
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+ )
+);
+
+
__m512i test_mm512_subs_epu16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_subs_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
// CHECK: call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
return _mm512_subs_epu16(__A,__B);
}
+TEST_CONSTEXPR(match_v32hu(_mm512_subs_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, 0, 0, 0, +16384, 0, 0, 0, 0, 0, +32767, +16383, 0, 0, 0, 0, +32768, +16384, +1, 0, 0, 0, +49152, +32768, +16385, +16384, 0, 0, +65535, +32768, +32767, 0));
+
__m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_subs_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
// CHECK: call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}})
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_subs_epu16(__W,__U,__A,__B);
-TEST_CONSTEXPR(match_v32hu(_mm512_subs_epu16((__m512i)(__v32hu){0, 0, 0, 0, +16384, +16384, +16384, +16384, +16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767, +32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152, +49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535}, (__m512i)(__v32hu){0, +32767, +32768, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +16384, +32767, +32768, +49152, +65535, 0, +32767, +32768, +65535}), 0, 0, 0, 0, +16384, 0, 0, 0, 0, 0, +32767, +16383, 0, 0, 0, 0, +32768, +16384, +1, 0, 0, 0, +49152, +32768, +16385, +16384, 0, 0, +65535, +32768, +32767, 0));
}
+TEST_CONSTEXPR(
+ match_v32hu(
+ _mm512_mask_subs_epu16(
+ (__m512i)(__v32hu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hu){
+ 1, 30000, 3, 4, 5, 6, 7, 8,
+ 9, 30000, 11, 12, 13, 14, 15, 16,
+ 17, 30000, 19, 20, 21, 22, 23, 24,
+ 25, 30000, 27, 28, 29, 30, 31, 32
+ },
+ (__m512i)(__v32hu){
+ 1, 60000, 3, 4, 5, 6, 7, 8,
+ 9, 60000, 11, 12, 13, 14, 15, 16,
+ 17, 60000, 19, 20, 21, 22, 23, 24,
+ 25, 60000, 27, 28, 29, 30, 31, 32
+ }
+ ),
+ 1, 0, 3, 0, 5, 0, 7, 0,
+ 9, 0, 11, 0, 13, 0, 15, 0,
+ 17, 0, 19, 0, 21, 0, 23, 0,
+ 25, 0, 27, 0, 29, 0, 31, 0
+ )
+);
+
__m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_subs_epu16
// CHECK-NOT: @llvm.x86.avx512.mask.psubus.w.512
@@ -1993,6 +2391,31 @@ __m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_subs_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32hu(
+ _mm512_maskz_subs_epu16(
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hu){
+ 1, 30000, 3, 4, 5, 6, 7, 8,
+ 9, 30000, 11, 12, 13, 14, 15, 16,
+ 17, 30000, 19, 20, 21, 22, 23, 24,
+ 25, 30000, 27, 28, 29, 30, 31, 32
+ },
+ (__m512i)(__v32hu){
+ 1, 60000, 3, 4, 5, 6, 7, 8,
+ 9, 60000, 11, 12, 13, 14, 15, 16,
+ 17, 60000, 19, 20, 21, 22, 23, 24,
+ 25, 60000, 27, 28, 29, 30, 31, 32
+ }
+ ),
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0
+ )
+);
+
+
__m512i test_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, __m512i __B) {
// CHECK-LABEL: test_mm512_mask2_permutex2var_epi16
// CHECK: @llvm.x86.avx512.vpermi2var.hi.512
@@ -2184,6 +2607,35 @@ __m512i test_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, _
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_unpackhi_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_unpackhi_epi8(
+ (__m512i)(__v64qs){
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ 116, 117, 118, 119, 120, 121, 122, 123, -128, -127, -126, -125, -124, -123, -122, -121,
+ -120,-119,-118,-117,-116,-115,-114,-113, -112, -111, -110, -109, -108, -107, -106, -105,
+ -104,-103,-102,-101,-100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89
+ },
+ (__m512i)(__v64qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32,
+ -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48,
+ -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64
+ }
+ ),
+ 1, -9, 3,-10, 5,-11, 7,-12, 9,-13, 11,-14, 13,-15, 15,-16,
+ 17,-25, 19,-26, 21,-27, 23,-28, 25,-29, 27,-30, 29,-31, 31,-32,
+ 33,-41, 35,-42, 37,-43, 39,-44, 41,-45, 43,-46, 45,-47, 47,-48,
+ 49,-57, 51,-58, 53,-59, 55,-60, 57,-61, 59,-62, 61,-63, 63,-64
+ )
+);
__m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpackhi_epi8
@@ -2191,6 +2643,29 @@ __m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B)
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_unpackhi_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_unpackhi_epi8(
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ 116, 117, 118, 119, 120, 121, 122, 123, -128, -127, -126, -125, -124, -123, -122, -121,
+ -120,-119,-118,-117,-116,-115,-114,-113, -112, -111, -110, -109, -108, -107, -106, -105,
+ -104,-103,-102,-101,-100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89
+ },
+ (__m512i)(__v64qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ -17, -18, -19, -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, -30, -31, -32,
+ -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45, -46, -47, -48,
+ -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64
+ }
+ ),
+ 0, -9, 0,-10, 0,-11, 0,-12, 0,-13, 0,-14, 0,-15, 0,-16,
+ 0,-25, 0,-26, 0,-27, 0,-28, 0,-29, 0,-30, 0,-31, 0,-32,
+ 0,-41, 0,-42, 0,-43, 0,-44, 0,-45, 0,-46, 0,-47, 0,-48,
+ 0,-57, 0,-58, 0,-59, 0,-60, 0,-61, 0,-62, 0,-63, 0,-64
+ )
+);
__m512i test_mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_unpackhi_epi16
@@ -2206,6 +2681,35 @@ __m512i test_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_unpackhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_unpackhi_epi16(
+ (__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 110, 111, 112, 113, 114, 115, 116, 117,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m512i)(__v32hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 210, 211, 212, 213, 214, 215, 216, 217,
+ 220, 221, 222, 223, 224, 225, 226, 227,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 1, 204, 3, 205, 5, 206, 7, 207,
+ 9, 214, 11, 215, 13, 216, 15, 217,
+ 17, 224, 19, 225, 21, 226, 23, 227,
+ 25, 234, 27, 235, 29, 236, 31, 237
+ )
+);
__m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpackhi_epi16
@@ -2213,6 +2717,29 @@ __m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_unpackhi_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_unpackhi_epi16(
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 110, 111, 112, 113, 114, 115, 116, 117,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m512i)(__v32hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 210, 211, 212, 213, 214, 215, 216, 217,
+ 220, 221, 222, 223, 224, 225, 226, 227,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 0, 204, 0, 205, 0, 206, 0, 207,
+ 0, 214, 0, 215, 0, 216, 0, 217,
+ 0, 224, 0, 225, 0, 226, 0, 227,
+ 0, 234, 0, 235, 0, 236, 0, 237
+ )
+);
__m512i test_mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_unpacklo_epi8
@@ -2227,6 +2754,35 @@ __m512i test_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, _
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_unpacklo_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_unpacklo_epi8(
+ (__m512i)(__v64qs){
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25,
+ -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45,
+ -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65
+ },
+ (__m512i)(__v64qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75
+ }
+ ),
+ 1, -1, 3, -2, 5, -3, 7, -4, 9, -5, 11, -6, 13, -7, 15, -8,
+ 17, 20, 19, 21, 21, 22, 23, 23, 25, 24, 27, 25, 29, 26, 31, 27,
+ 33, 40, 35, 41, 37, 42, 39, 43, 41, 44, 43, 45, 45, 46, 47, 47,
+ 49, 60, 51, 61, 53, 62, 55, 63, 57, 64, 59, 65, 61, 66, 63, 67
+ )
+);
__m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpacklo_epi8
@@ -2234,6 +2790,29 @@ __m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B)
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_unpacklo_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_unpacklo_epi8(
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ -10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25,
+ -30, -31, -32, -33, -34, -35, -36, -37, -38, -39, -40, -41, -42, -43, -44, -45,
+ -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65
+ },
+ (__m512i)(__v64qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
+ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75
+ }
+ ),
+ 0, -1, 0, -2, 0, -3, 0, -4, 0, -5, 0, -6, 0, -7, 0, -8,
+ 0, 20, 0, 21, 0, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 27,
+ 0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
+ 0, 60, 0, 61, 0, 62, 0, 63, 0, 64, 0, 65, 0, 66, 0, 67
+ )
+);
__m512i test_mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_unpacklo_epi16
@@ -2248,6 +2827,35 @@ __m512i test_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A,
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_unpacklo_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_unpacklo_epi16(
+ (__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 110, 111, 112, 113, 114, 115, 116, 117,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m512i)(__v32hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 210, 211, 212, 213, 214, 215, 216, 217,
+ 220, 221, 222, 223, 224, 225, 226, 227,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 1, 200, 3, 201, 5, 202, 7, 203,
+ 9, 210, 11, 211, 13, 212, 15, 213,
+ 17, 220, 19, 221, 21, 222, 23, 223,
+ 25, 230, 27, 231, 29, 232, 31, 233
+ )
+);
__m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_unpacklo_epi16
@@ -2256,6 +2864,30 @@ __m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B)
return _mm512_maskz_unpacklo_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_unpacklo_epi16(
+ (__mmask32)0xAAAAAAAAu,
+ (__m512i)(__v32hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 110, 111, 112, 113, 114, 115, 116, 117,
+ 120, 121, 122, 123, 124, 125, 126, 127,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m512i)(__v32hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 210, 211, 212, 213, 214, 215, 216, 217,
+ 220, 221, 222, 223, 224, 225, 226, 227,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 0, 200, 0, 201,0, 202, 0, 203,
+ 0, 210, 0, 211,0, 212, 0, 213,
+ 0, 220, 0, 221,0, 222, 0, 223,
+ 0, 230, 0, 231,0, 232, 0, 233
+ )
+);
+
__m512i test_mm512_cvtepi8_epi16(__m256i __A) {
// CHECK-LABEL: test_mm512_cvtepi8_epi16
// CHECK: sext <32 x i8> %{{.*}} to <32 x i16>
@@ -2702,7 +3334,7 @@ TEST_CONSTEXPR(
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63
},
- (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
-0, -1, -2, -3, -4, -5, -6, -7,
-8, -9, -10, -11, -12, -13, -14, -15,
@@ -2733,7 +3365,7 @@ __m512i test_mm512_maskz_mov_epi8(__mmask64 __U, __m512i __A) {
TEST_CONSTEXPR(
match_v64qi(
_mm512_maskz_mov_epi8(
- (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
-0, -1, -2, -3, -4, -5, -6, -7,
-8, -9, -10, -11, -12, -13, -14, -15,
@@ -2823,6 +3455,33 @@ __m512i test_mm512_mask_set1_epi8(__m512i __O, __mmask64 __M, char __A) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_set1_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_mask_set1_epi8(
+ (__m512i)(__v64qi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32,
+ 33, 34, 35, 36, 37, 38, 39, 40,
+ 41, 42, 43, 44, 45, 46, 47, 48,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (char)-1
+ ),
+ 1, -1, 3, -1, 5, -1, 7, -1,
+ 9, -1, 11, -1, 13, -1, 15, -1,
+ 17, -1, 19, -1, 21, -1, 23, -1,
+ 25, -1, 27, -1, 29, -1, 31, -1,
+ 33, -1, 35, -1, 37, -1, 39, -1,
+ 41, -1, 43, -1, 45, -1, 47, -1,
+ 49, -1, 51, -1, 53, -1, 55, -1,
+ 57, -1, 59, -1, 61, -1, 63, -1
+ )
+);
+
__m512i test_mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
// CHECK-LABEL: test_mm512_maskz_set1_epi8
@@ -2893,6 +3552,23 @@ __m512i test_mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_set1_epi8(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v64qi(
+ _mm512_maskz_set1_epi8(
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (char)-1
+ ),
+ 0, -1, 0, -1,0, -1, 0, -1,
+ 0, -1, 0, -1,0, -1, 0, -1,
+ 0, -1, 0, -1,0, -1, 0, -1,
+ 0, -1, 0, -1,0, -1, 0, -1,
+ 0, -1, 0, -1,0, -1, 0, -1,
+ 0, -1, 0, -1,0, -1, 0, -1,
+ 0, -1, 0, -1,0, -1, 0, -1,
+ 0, -1, 0, -1,0, -1, 0, -1
+ )
+);
+
__mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
// CHECK-LABEL: test_mm512_kunpackd
@@ -3081,7 +3757,7 @@ TEST_CONSTEXPR(
48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63
},
- (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m128i)(__v16qs){
-120, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15
@@ -3109,7 +3785,7 @@ __m512i test_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) {
TEST_CONSTEXPR(
match_v64qi(
_mm512_maskz_broadcastb_epi8(
- (__mmask64)0xAAAAAAAAAAAAAAAAul,
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m128i)(__v16qs){
-120, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15
@@ -3218,6 +3894,24 @@ __m512i test_mm512_mask_set1_epi16(__m512i __O, __mmask32 __M, short __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_set1_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_mask_set1_epi16(
+ (__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask32)0xAAAAAAAAu,
+ -1
+ ),
+ 1, -1, 3,-1, 5, -1, 7, -1,
+ 9, -1, 11, -1, 13, -1, 15, -1,
+ 17, -1, 19, -1, 21, -1, 23, -1,
+ 25, -1, 27, -1, 29, -1, 31, -1
+ )
+);
__m512i test_mm512_maskz_set1_epi16(__mmask32 __M, short __A) {
// CHECK-LABEL: test_mm512_maskz_set1_epi16
@@ -3256,6 +3950,19 @@ __m512i test_mm512_maskz_set1_epi16(__mmask32 __M, short __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_set1_epi16(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_maskz_set1_epi16(
+ (__mmask32)0xAAAAAAAAu,
+ -1
+ ),
+ 0, -1,0, -1, 0, -1, 0, -1,
+ 0, -1,0, -1, 0, -1, 0, -1,
+ 0, -1,0, -1, 0, -1, 0, -1,
+ 0, -1,0, -1, 0, -1, 0, -1
+ )
+);
+
__m512i test_mm512_permutexvar_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.512
>From bab493ee8951aba003ae81e14f3bfb28659eb09f Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Wed, 8 Oct 2025 21:37:33 +0100
Subject: [PATCH 3/6] [HEADERS][X86] Tidied up some tests
---
clang/test/CodeGen/X86/avx512bw-builtins.c | 1188 +++++++++-----------
1 file changed, 547 insertions(+), 641 deletions(-)
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index e8e8698a60e6e..1b36a8009d345 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1066,33 +1066,19 @@ __m512i test_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_packs_epi32(__M,__A,__B);
}
-TEST_CONSTEXPR(
- match_v32hi(
- _mm512_maskz_packs_epi32(
- (__mmask32)0xAAAAAAAAu,
- (__m512i)(__v16si){
- 50000, 50000, 50000, 50000,
- 50000, 50000, 50000, 50000,
- 50000, 50000, 50000, 50000,
- 50000, 50000, 50000, 50000
- },
- (__m512i)(__v16si){
- -50000, -50000, -50000, -50000,
- -50000, -50000, -50000, -50000,
- -50000, -50000, -50000, -50000,
- -50000, -50000, -50000, -50000
- }
- ),
- 0, 32767, 0, 32767,
- 0, -32768, 0, -32768,
- 0, 32767, 0, 32767,
- 0, -32768, 0, -32768,
- 0, 32767, 0, 32767,
- 0, -32768, 0, -32768,
- 0, 32767, 0, 32767,
- 0, -32768, 0, -32768
- )
-);
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_packs_epi32(
+(__mmask32)0xAAAAAAAA,
+(__m512i)(__v16si){
+40000, -50000, 32767, -32768, 70000, -70000, 42, -42,
+0, 1, -1, 30000, 32768, -32769, 65535, -65536
+}, (__m512i)(__v16si){
+0, 1, -1, 65536, -1000000, 1000000, 32768, -32769,
+123456, -123456, 32767, -32768, 22222, -22222, 40000, -40000
+}),
+ 0, -32768, 0, -32768, 0, 1,0, 32767,
+ 0, -32768, 0, -42, 0, 32767,0, -32768,
+ 0, 1, 0, 30000, 0, -32768,0, -32768,
+ 0, -32768, 0, -32768, 0, -22222,0, -32768));
__m512i test_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_packs_epi32
@@ -1100,39 +1086,28 @@ __m512i test_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_packs_epi32(__W,__M,__A,__B);
}
-TEST_CONSTEXPR(
- match_v32hi(
- _mm512_mask_packs_epi32(
- (__m512i)(__v32hi){
- 1, 2, 3, 4, 5, 6, 7, 8,
- 9, 10, 11, 12, 13, 14, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32
- },
- (__mmask32)0xAAAAAAAAu,
- (__m512i)(__v16si){
- 50000, 50000, 50000, 50000,
- 50000, 50000, 50000, 50000,
- 50000, 50000, 50000, 50000,
- 50000, 50000, 50000, 50000
- },
- (__m512i)(__v16si){
- -50000, -50000, -50000, -50000,
- -50000, -50000, -50000, -50000,
- -50000, -50000, -50000, -50000,
- -50000, -50000, -50000, -50000
- }
- ),
- 1, 32767, 3, 32767,
- 5, -32768, 7, -32768,
- 9, 32767, 11, 32767,
- 13, -32768, 15, -32768,
- 17, 32767, 19, 32767,
- 21, -32768, 23, -32768,
- 25, 32767, 27, 32767,
- 29, -32768, 31, -32768
- )
-);
+
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_packs_epi32(
+(__m512i)(__v32hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+},
+(__mmask32)0xAAAAAAAA,
+(__m512i)(__v16si){
+40000, -50000, 32767, -32768, 70000, -70000, 42, -42,
+0, 1, -1, 30000, 32768, -32769, 65535, -65536
+}, (__m512i)(__v16si){
+0, 1, -1, 65536, -1000000, 1000000, 32768, -32769,
+123456, -123456, 32767, -32768, 22222, -22222, 40000, -40000
+}),
+ 1, -32768, 3, -32768, 5, 1, 7, 32767,
+ 9, -32768, 11, -42, 13, 32767, 15, -32768,
+ 17, 1, 19, 30000, 21, -32768, 23, -32768,
+ 25, -32768, 27, -32768, 29, -22222, 31, -32768));
+
+
__m512i test_mm512_packs_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packs_epi16
@@ -1161,34 +1136,25 @@ TEST_CONSTEXPR(
},
(__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v32hi){
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767
- },
+ 130, -200, 127, -128, 300, -1000, 42, -42,
+32767, -32767, 127, -128, 30000, -30000, 90, -90,
+ 130, -200, 0, -1, 126, -127, 128, -129,
+ 500, -500, 7, -7, 255, -255, 127, -128
+},
(__m512i)(__v32hi){
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767
- }
- ),
- 1, 127, 3, 127, 5, 127, 7, 127,
- 9, -128, 11, -128, 13, -128, 15, -128,
- 17, 127, 19, 127, 21, 127, 23, 127,
- 25, -128, 27, -128, 29, -128, 31, -128,
- 33, 127, 35, 127, 37, 127, 39, 127,
- 41, -128, 43, -128, 45, -128, 47, -128,
- 49, 127, 51, 127, 53, 127, 55, 127,
- 57, -128, 59, -128, 61, -128, 63, -128
+0, 1, -1, 255, -129, 128, 20000, -32768,
+5, -5, 100, -100, 127, -128, 512, -512,
+1, 2, -2, 300, -300, 127, -128, 42,
+0, 1, -1, 127, -128, 90, -90, -32768
+}),
+ 1, -128, 3, -128, 5, -128, 7, -42,
+ 9, 1, 11, 127, 13, 127, 15, -128,
+17, -128, 19, -128, 21, -128,23, -90,
+25, -5, 27, -100, 29, -128,31, -128,
+33, -128, 35, -1, 37, -127,39, -128,
+41, 2, 43, 127, 45, 127, 47, 42,
+49, -128, 51, -7, 53, -128,55, -128,
+57, 1, 59, 127, 61, 90, 63, -128
)
);
@@ -1203,34 +1169,25 @@ TEST_CONSTEXPR(
_mm512_maskz_packs_epi16(
(__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v32hi){
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767
- },
+ 130, -200, 127, -128, 300, -1000, 42, -42,
+32767, -32767, 127, -128, 30000, -30000, 90, -90,
+ 130, -200, 0, -1, 126, -127, 128, -129,
+ 500, -500, 7, -7, 255, -255, 127, -128
+},
(__m512i)(__v32hi){
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767
- }
- ),
- 0, 127, 0, 127, 0, 127, 0, 127,
- 0, -128, 0, -128, 0, -128, 0, -128,
- 0, 127, 0, 127, 0, 127, 0, 127,
- 0, -128, 0, -128, 0, -128, 0, -128,
- 0, 127, 0, 127, 0, 127, 0, 127,
- 0, -128, 0, -128, 0, -128, 0, -128,
- 0, 127, 0, 127, 0, 127, 0, 127,
- 0, -128, 0, -128, 0, -128, 0, -128
+0, 1, -1, 255, -129, 128, 20000, -32768,
+5, -5, 100, -100, 127, -128, 512, -512,
+1, 2, -2, 300, -300, 127, -128, 42,
+0, 1, -1, 127, -128, 90, -90, -32768
+}),
+0, -128,0, -128,0, -128,0, -42,
+0, 1,0, 127,0, 127, 0, -128,
+0, -128,0, -128,0, -128,0, -90,
+0, -5,0, -100,0, -128,0, -128,
+0, -128,0, -1,0, -127,0, -128,
+0, 2,0, 127,0, 127, 0, 42,
+0, -128,0, -7,0, -128,0, -128,
+0, 1,0, 127,0, 90, 0, -128
)
);
@@ -1246,33 +1203,20 @@ __m512i test_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_packus_epi32(__M,__A,__B);
}
-TEST_CONSTEXPR(
- match_v32hu(
- _mm512_maskz_packus_epi32(
- (__mmask32)0xAAAAAAAAu,
- (__m512i)(__v16si){
- 70000, 70000, 70000, 70000,
- 70000, 70000, 70000, 70000,
- 70000, 70000, 70000, 70000,
- 70000, 70000, 70000, 70000
- },
- (__m512i)(__v16si){
- -70000, -70000, -70000, -70000,
- -70000, -70000, -70000, -70000,
- -70000, -70000, -70000, -70000,
- -70000, -70000, -70000, -70000
- }
- ),
- 0, 65535, 0, 65535,
- 0, 0, 0, 0,
- 0, 65535, 0, 65535,
- 0, 0, 0, 0,
- 0, 65535, 0, 65535,
- 0, 0, 0, 0,
- 0, 65535, 0, 65535,
- 0, 0, 0, 0
- )
-);
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_packus_epi32(
+ (__mmask32)0xAAAAAAAA,
+(__m512i)(__v16si){
+40000, -50000, 32767, -32768, 70000, -70000, 42, -42,
+0, 1, -1, 65535, 32768, -32769, 22222, -22222
+}, (__m512i)(__v16si){
+0, 1, -1, 65536, -1000000, 1000000, 32768, -32769,
+123456, -123456, 32767, -32768, 40000, -40000, 65535, 0
+}),
+0, 0,0, 0,0, 1,0, -1,
+0, 0,0, 0,0, -1,0, 0,
+0, 1,0, -1,0, 0,0, 0,
+0, 0,0, 0,0, 0,0, 0
+));
__m512i test_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
@@ -1281,39 +1225,27 @@ __m512i test_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_packus_epi32(__W,__M,__A,__B);
}
-TEST_CONSTEXPR(
- match_v32hu(
- _mm512_mask_packus_epi32(
+
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_packus_epi32(
(__m512i)(__v32hi){
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32
},
- (__mmask32)0xAAAAAAAAu,
- (__m512i)(__v16si){
- 70000, 70000, 70000, 70000,
- 70000, 70000, 70000, 70000,
- 70000, 70000, 70000, 70000,
- 70000, 70000, 70000, 70000
- },
- (__m512i)(__v16si){
- -70000, -70000, -70000, -70000,
- -70000, -70000, -70000, -70000,
- -70000, -70000, -70000, -70000,
- -70000, -70000, -70000, -70000
- }
- ),
- 1, 65535, 3, 65535,
- 5, 0, 7, 0,
- 9, 65535, 11, 65535,
- 13, 0, 15, 0,
- 17, 65535, 19, 65535,
- 21, 0, 23, 0,
- 25, 65535, 27, 65535,
- 29, 0, 31, 0
- )
-);
+ (__mmask32)0xAAAAAAAA,
+(__m512i)(__v16si){
+40000, -50000, 32767, -32768, 70000, -70000, 42, -42,
+0, 1, -1, 65535, 32768, -32769, 22222, -22222
+}, (__m512i)(__v16si){
+0, 1, -1, 65536, -1000000, 1000000, 32768, -32769,
+123456, -123456, 32767, -32768, 40000, -40000, 65535, 0
+}),
+ 1, 0, 3, 0, 5, 1, 7, -1,
+ 9, 0,11, 0, 13, -1, 15, 0,
+17, 1,19, -1, 21, 0, 23, 0,
+25, 0,27, 0, 29, 0, 31, 0
+));
__m512i test_mm512_packus_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_packus_epi16
@@ -1327,9 +1259,8 @@ __m512i test_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_packus_epi16(__W,__M,__A,__B);
}
-TEST_CONSTEXPR(
- match_v64qu(
- _mm512_mask_packus_epi16(
+
+TEST_CONSTEXPR(match_v64qi(_mm512_mask_packus_epi16(
(__m512i)(__v64qu){
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
@@ -1341,37 +1272,26 @@ TEST_CONSTEXPR(
57, 58, 59, 60, 61, 62, 63, 64
},
(__mmask64)0xAAAAAAAAAAAAAAAA,
- (__m512i)(__v32hi){
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767
- },
- (__m512i)(__v32hi){
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767
- }
- ),
- 1, 255, 3,255, 5,255, 7, 255,
- 9, 0, 11, 0, 13, 0, 15, 0,
- 17, 255, 19,255, 21,255, 23, 255,
- 25, 0, 27, 0, 29, 0, 31, 0,
- 33, 255, 35,255, 37,255, 39, 255,
- 41, 0, 43, 0, 45, 0, 47, 0,
- 49, 255, 51,255, 53,255, 55, 255,
- 57, 0, 59, 0, 61, 0, 63, 0
- )
-);
+(__m512i)(__v32hi){
+-1, 0, 1, 127, 128, 255, 256, -200,
+300, 42, -42, 500, 20000, -32768, 129, -129,
+-1, 0, 1, 127, 128, 255, 256, -200,
+300, 42, -42, 500, 20000, -32768, 129, -129
+}, (__m512i)(__v32hi){
+0, 1, -1, 255, -129, 128, 20000, -32768,
+32767, -32767, 127, -128, 30000, -30000, 90, -90,
+0, 1, -1, 255, -129, 128, 20000, -32768,
+32767, -32767, 127, -128, 30000, -30000, 90, -90
+}),
+ 1, 0, 3, 127, 5, -1, 7, 0,
+ 9, 1, 11, -1, 13, -128, 15, 0,
+17, 42, 19, -1, 21, 0, 23, 0,
+25, 0, 27, 0, 29, 0, 31, 0,
+33, 0, 35, 127, 37, -1, 39, 0,
+41, 1, 43, -1, 45, -128, 47, 0,
+49, 42, 51, -1, 53, 0, 55, 0,
+57, 0, 59, 0, 61, 0, 63, 0
+));
__m512i test_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
@@ -1380,41 +1300,28 @@ __m512i test_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_packus_epi16(__M,__A,__B);
}
-TEST_CONSTEXPR(
- match_v64qu(
- _mm512_maskz_packus_epi16(
+TEST_CONSTEXPR(match_v64qi(_mm512_maskz_packus_epi16(
(__mmask64)0xAAAAAAAAAAAAAAAA,
- (__m512i)(__v32hi){
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767,
- 32767, 32767, 32767, 32767
- },
- (__m512i)(__v32hi){
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767,
- -32767, -32767, -32767, -32767
- }
- ),
- 0, 255, 0,255, 0,255, 0, 255,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 255, 0,255, 0,255, 0, 255,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 255, 0,255, 0,255, 0, 255,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 255, 0,255, 0,255, 0, 255,
- 0, 0, 0, 0, 0, 0, 0, 0
- )
-);
+(__m512i)(__v32hi){
+-1, 0, 1, 127, 128, 255, 256, -200,
+300, 42, -42, 500, 20000, -32768, 129, -129,
+-1, 0, 1, 127, 128, 255, 256, -200,
+300, 42, -42, 500, 20000, -32768, 129, -129
+}, (__m512i)(__v32hi){
+0, 1, -1, 255, -129, 128, 20000, -32768,
+32767, -32767, 127, -128, 30000, -30000, 90, -90,
+0, 1, -1, 255, -129, 128, 20000, -32768,
+32767, -32767, 127, -128, 30000, -30000, 90, -90
+}),
+0, 0, 0, 127,0, -1,0, 0,
+0, 1, 0, -1,0, -128,0, 0,
+0, 42, 0, -1,0, 0,0, 0,
+0, 0, 0, 0,0, 0,0, 0,
+0, 0, 0, 127,0, -1,0, 0,
+0, 1, 0, -1,0, -128,0, 0,
+0, 42, 0, -1,0, 0,0, 0,
+0, 0, 0, 0,0, 0,0, 0
+));
__m512i test_mm512_adds_epi8(__m512i __A, __m512i __B) {
@@ -1430,8 +1337,8 @@ __m512i test_mm512_mask_adds_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_adds_epi8(__W,__U,__A,__B);
}
-TEST_CONSTEXPR(
- match_v64qi(
+
+TEST_CONSTEXPR(match_v64qi(
_mm512_mask_adds_epi8(
(__m512i)(__v64qs){
1, 2, 3, 4, 5, 6, 7, 8,
@@ -1444,38 +1351,36 @@ TEST_CONSTEXPR(
57, 58, 59, 60, 61, 62, 63, 64
},
(__mmask64)0xAAAAAAAAAAAAAAAA,
- (__m512i)(__v64qs){
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64
- },
- (__m512i)(__v64qs){
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64
- }
- ),
- 1, 126, 3, 0, 5, 127, 7, 0,
- 9, 126, 11, 0, 13, 127, 15, 0,
- 17, 126, 19, 0, 21, 127, 23, 0,
- 25, 126, 27, 0, 29, 127, 31, 0,
- 33, 126, 35, 0, 37, 127, 39, 0,
- 41, 126, 43, 0, 45, 127, 47, 0,
- 49, 126, 51, 0, 53, 127, 55, 0,
- 57, 126, 59, 0, 61, 127, 63, 0
- )
-);
-
+ (__m512i)(__v64qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, -28, +29, -30, +31,
+ -32, +33, -34, +35, -36, +37, -38, +39,
+ -40, +41, -42, +43, -44, +45, -46, +47,
++100, +50, -100, +20, +80, -50, +120, -20,
+-100, -50, +100, -20, -80, +50, -120, +20
+},
+ (__m512i)(__v64qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, -28, +29, -30, +31,
+ -32, +33, -34, +35, -36, +37, -38, +39,
+ -40, +41, -42, +43, -44, +45, -46, +47,
+ +50, +80, -50, +110, +60, -30, +20, -10,
+ +50, +80, -50, +110, +60, -30, +20, -10
+ }
+ ),
+ 1, +2, 3, +6, 5, +10, 7, +14,
+ 9, +18, 11, +22, 13, +26, 15, +30,
+ 17, +34, 19, +38, 21, +42, 23, +46,
+ 25, +50, 27, +54, 29, +58, 31, +62,
+ 33, +66, 35, +70, 37, +74, 39, +78,
+ 41, +82, 43, +86, 45, +90, 47, +94,
+ 49, +127, 51, +127, 53, -80, +55, -30,
+ 57, +30, 59, +90, 61, +20, 63, +10
+));
__m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
@@ -1484,48 +1389,65 @@ __m512i test_mm512_maskz_adds_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_adds_epi8(__U,__A,__B);
}
-TEST_CONSTEXPR(
- match_v64qi(
+TEST_CONSTEXPR(match_v64qi(
_mm512_maskz_adds_epi8(
- (__mmask64)0xFFFFFFFFFFFFFFFFu,
- (__m512i)(__v64qs){
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64,
- 127, 63, 1, 0, 1, 64, -127, 64
- },
- (__m512i)(__v64qs){
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64,
- 127, 63, 1, 0, -127, 64, -127, -64
- }
- ),
- 127, 126, 2, 0, -126, 127, -128, 0,
- 127, 126, 2, 0, -126, 127, -128, 0,
- 127, 126, 2, 0, -126, 127, -128, 0,
- 127, 126, 2, 0, -126, 127, -128, 0,
- 127, 126, 2, 0, -126, 127, -128, 0,
- 127, 126, 2, 0, -126, 127, -128, 0,
- 127, 126, 2, 0, -126, 127, -128, 0,
- 127, 126, 2, 0, -126, 127, -128, 0
- )
-);
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__m512i)(__v64qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, -28, +29, -30, +31,
+ -32, +33, -34, +35, -36, +37, -38, +39,
+ -40, +41, -42, +43, -44, +45, -46, +47,
++100, +50, -100, +20, +80, -50, +120, -20,
+-100, -50, +100, -20, -80, +50, -120, +20
+},
+ (__m512i)(__v64qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, -28, +29, -30, +31,
+ -32, +33, -34, +35, -36, +37, -38, +39,
+ -40, +41, -42, +43, -44, +45, -46, +47,
+ +50, +80, -50, +110, +60, -30, +20, -10,
+ +50, +80, -50, +110, +60, -30, +20, -10
+ }
+ ),
+ 0, +2, 0, +6, 0, +10, 0, +14,
+ 0, +18, 0, +22, 0, +26, 0, +30,
+ 0, +34, 0, +38, 0, +42, 0, +46,
+ 0, +50, 0, +54, 0, +58, 0, +62,
+ 0, +66, 0, +70, 0, +74, 0, +78,
+ 0, +82, 0, +86, 0, +90, 0, +94,
+ 0, +127, 0, +127, 0, -80, 0, -30,
+ 0, +30, 0, +90, 0, +20, 0, +10
+));
__m512i test_mm512_adds_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epi16
// CHECK: @llvm.sadd.sat.v32i16
return _mm512_adds_epi16(__A,__B);
}
-TEST_CONSTEXPR(match_v32hi(_mm512_adds_epi16((__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +32000, -32000, +32000, -32000}, (__m512i)(__v32hi){0, +1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, +800, -800, -800, +800}), 0, +2, -4, +6, -8, +10, -12, +14, -16, +18, -20, +22, -24, +26, -28, +30, -32, +34, -36, +38, -40, +42, -44, +46, -48, +50, -52, +54, +32767, -32768, +31200, -31200));
+
+TEST_CONSTEXPR(
+ match_v32hi(
+ _mm512_adds_epi16(
+(__m512i)(__v32hi){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, +32000, -32000, +32000, -32000
+},
+(__m512i)(__v32hi){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, +800, -800, -800, +800}),
+ 0, +2, - 4, +6, -8, +10, -12, +14,
+-16, +18, -20, +22, -24, +26, -28, +30,
+-32, +34, -36, +38, -40, +42, -44, +46,
+-48, +50, -52, +54, +32767, -32768, +31200, -31200));
+
__m512i test_mm512_mask_adds_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_adds_epi16
@@ -1537,29 +1459,29 @@ TEST_CONSTEXPR(
match_v32hi(
_mm512_mask_adds_epi16(
(__m512i)(__v32hi){
- 1, 2, 3, 4, 5, 6, 7, 8,
- 9, 10, 11, 12, 13, 14, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
},
(__mmask32)0xAAAAAAAAu,
(__m512i)(__v32hi){
- 30000, 100, -20000, 15000, -32768, 16384, 20000, -100,
- 30000, 100, -20000, 15000, -32768, 16384, 20000, -100,
- 30000, 100, -20000, 15000, -32768, 16384, 20000, -100,
- 30000, 100, -20000, 15000, -32768, 16384, 20000, -100
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, - 14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, +32000, -32000, +32000, +32000
},
(__m512i)(__v32hi){
- 30000, 200, -20000, 20000, -1, 20000, 20000, 50,
- 30000, 200, -20000, 20000, -1, 20000, 20000, 50,
- 30000, 200, -20000, 20000, -1, 20000, 20000, 50,
- 30000, 200, -20000, 20000, -1, 20000, 20000, 50
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, +800, -800, -800, +800
}
),
- 1, 300, 3, 32767, 5, 32767, 7, -50,
- 9, 300, 11, 32767, 13, 32767, 15, -50,
- 17, 300, 19, 32767, 21, 32767, 23, -50,
- 25, 300, 27, 32767, 29, 32767, 31, -50
+ 1, +2, 3, +6, 5, +10, 7, +14,
+ 9, +18, 11, +22, 13, +26, 15, +30,
+ 17, +34, 19, +38, 21, +42, 23, +46,
+ 25, +50, 27, +54, 29, -32768, 31, +32767
)
);
@@ -1575,25 +1497,25 @@ return _mm512_maskz_adds_epi16(__U,__A,__B);
TEST_CONSTEXPR(
match_v32hi(
_mm512_maskz_adds_epi16(
- (__mmask32)0xFFFFFFFFu,
+ (__mmask32)0xAAAAAAAAu,
(__m512i)(__v32hi){
- 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384,
- 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384,
- 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384,
- 30000, -20000, 1000, -20000, 15000, 200, -32768, 16384
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, - 14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, +32000, -32000, +32000, +32000
},
(__m512i)(__v32hi){
- 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384,
- 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384,
- 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384,
- 30000, -20000, 2000, 10000, 20000, 1000, -1, 16384
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ -16, +17, -18, +19, -20, +21, -22, +23,
+ -24, +25, -26, +27, +800, -800, -800, +800
}
),
-
- 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767,
- 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767,
- 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767,
- 32767, -32768, 3000, -10000, 32767, 1200, -32768, 32767
+ 0, +2, 0, +6, 0, +10, 0, +14,
+ 0, +18, 0, +22, 0, +26, 0, +30,
+ 0, +34, 0, +38, 0, +42, 0, +46,
+ 0, +50, 0, +54, 0, -32768, 0, +32767
+
)
);
@@ -1612,10 +1534,7 @@ __m512i test_mm512_mask_adds_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_adds_epu8(__W,__U,__A,__B);
}
-TEST_CONSTEXPR(
- match_v64qu(
- _mm512_mask_adds_epu8(
- (__m512i)(__v64qu){
+TEST_CONSTEXPR(match_v64qu(_mm512_mask_adds_epu8((__m512i)(__v64qu){
1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24,
@@ -1627,36 +1546,33 @@ TEST_CONSTEXPR(
},
(__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qu){
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255
- },
- (__m512i)(__v64qu){
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- }
- ),
- 1, 201, 3, 150, 5, 200, 7, 255,
- 9, 201, 11, 150, 13, 200, 15, 255,
- 17, 201, 19, 150, 21, 200, 23, 255,
- 25, 201, 27, 150, 29, 200, 31, 255,
- 33, 201, 35, 150, 37, 200, 39, 255,
- 41, 201, 43, 150, 45, 200, 47, 255,
- 49, 201, 51, 150, 53, 200, 55, 255,
- 57, 201, 59, 150, 61, 200, 63, 255
- )
-);
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ +63, +63, +63, +63, +63, +63, +63, +63,
+ +64, +64, +64, +64, +64, +64, +64, +64,
++127, +127, +127, +127, +127, +127, +127, +127,
++128, +128, +128, +128, +128, +128, +128, +128,
++191, +191, +191, +191, +191, +191, +191, +191,
++192, +192, +192, +192, +192, +192, +192, +192,
++255, +255, +255, +255, +255, +255, +255, +255
+}, (__m512i)(__v64qu){
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255}),
+
+ 1, +63, 3, +127, 5, +191, 7, +255,
+ 9, +126, 11, +190, 13, +254, 15, +255,
+17, +127, 19, +191, 21, +255, 23, +255,
+25, +190, 27, +254, 29, +255, 31, +255,
+33, +191, 35, +255, 37, +255, 39, +255,
+41, +254, 43, +255, 45, +255, 47, +255,
+49, +255, 51, +255, 53, +255, 55, +255,
+57, +255, 59, +255, 61, +255, 63, +255
+));
__m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epu8
@@ -1665,51 +1581,36 @@ __m512i test_mm512_maskz_adds_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_adds_epu8(__U,__A,__B);
}
-TEST_CONSTEXPR(
- match_v64qu(
- _mm512_mask_adds_epu8(
- (__m512i)(__v64qu){
- 1, 2, 3, 4, 5, 6, 7, 8,
- 9, 10, 11, 12, 13, 14, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32,
- 33, 34, 35, 36, 37, 38, 39, 40,
- 41, 42, 43, 44, 45, 46, 47, 48,
- 49, 50, 51, 52, 53, 54, 55, 56,
- 57, 58, 59, 60, 61, 62, 63, 64
- },
- (__mmask64)0xFFFFFFFFFFFFFFFFu,
- (__m512i)(__v64qu){
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255,
- 0, 1, 10, 50, 100, 150, 200, 255
- },
+TEST_CONSTEXPR(match_v64qu(_mm512_maskz_adds_epu8(
+ (__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qu){
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- 255, 200, 150, 100, 80, 50, 20, 10,
- }
- ),
- 255, 201, 160, 150, 180, 200,220, 255,
- 255, 201, 160, 150, 180, 200, 220, 255,
- 255, 201, 160, 150, 180, 200, 220, 255,
- 255, 201, 160, 150, 180, 200, 220, 255,
- 255, 201, 160, 150, 180, 200, 220, 255,
- 255, 201, 160, 150, 180, 200, 220, 255,
- 255, 201, 160, 150, 180, 200, 220, 255,
- 255, 201, 160, 150, 180, 200, 220, 255
- )
-);
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ +63, +63, +63, +63, +63, +63, +63, +63,
+ +64, +64, +64, +64, +64, +64, +64, +64,
++127, +127, +127, +127, +127, +127, +127, +127,
++128, +128, +128, +128, +128, +128, +128, +128,
++191, +191, +191, +191, +191, +191, +191, +191,
++192, +192, +192, +192, +192, +192, +192, +192,
++255, +255, +255, +255, +255, +255, +255, +255
+}, (__m512i)(__v64qu){
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255}),
+
+0, +63,0, +127, 0, +191, 0, +255,
+0, +126,0, +190, 0, +254, 0, +255,
+0, +127,0, +191, 0, +255, 0, +255,
+0, +190,0, +254, 0, +255, 0, +255,
+0, +191,0, +255, 0, +255, 0, +255,
+0, +254,0, +255, 0, +255, 0, +255,
+0, +255,0, +255, 0, +255, 0, +255,
+0, +255,0, +255, 0, +255, 0, +255
+));
__m512i test_mm512_adds_epu16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_adds_epu16
@@ -1728,34 +1629,34 @@ __m512i test_mm512_mask_adds_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m5
}
TEST_CONSTEXPR(
- match_v32hu(
- _mm512_mask_adds_epu16(
- (__m512i)(__v32hu){
- 1, 2, 3, 4, 5, 6, 7, 8,
- 9, 10, 11, 12, 13, 14, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32
- },
- (__mmask32)0xAAAAAAAAu,
- (__m512i)(__v32hu){
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100
- },
- (__m512i)(__v32hu){
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50
- }
- ),
- 1, 300, 3, 65535, 5, 65535, 7, 150,
- 9, 300, 11, 65535, 13, 65535, 15, 150,
- 17, 300, 19, 65535, 21, 65535, 23, 150,
- 25, 300, 27, 65535, 29, 65535, 31, 150
- )
-);
+match_v32hu(
+_mm512_mask_adds_epu16(
+(__m512i)(__v32hu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 25, 26, 27, 28, 29, 30, 31, 32
+},
+(__mmask32)0xAAAAAAAA,
+(__m512i)(__v32hu){
+0, 0, 0, 0, +16384, +16384, +16384, +16384,
++16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767,
++32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152,
++49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535
+},
+(__m512i)(__v32hu){
+ 0, +32767, +32768, +65535, 0, +16384, +32767, +32768,
++49152, +65535, 0, +16384, +32767, +32768, +49152, +65535,
+ 0, +16384, +32767, +32768, +49152, +65535, 0, +16384,
++32767, +32768, +49152, +65535, 0, +32767, +32768, +65535
+}),
+
+ 1, +32767, 3, +65535, 5, +32768, 7, +49152,
+ 9, +65535, 11, +49151, 13, +65535, 15, +65535,
+17, +49152, 19, +65535, 21, +65535, 23, +65535,
+25, +65535, 27, +65535, 29, +65535, 31, +65535
+));
+
__m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_adds_epu16
@@ -1765,28 +1666,27 @@ __m512i test_mm512_maskz_adds_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
return _mm512_maskz_adds_epu16(__U,__A,__B);
}
TEST_CONSTEXPR(
- match_v32hu(
- _mm512_maskz_adds_epu16(
- (__mmask32)0xFFFFFFFFu,
- (__m512i)(__v32hu){
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100,
- 30000, 100, 20000, 45000, 65534, 60000, 20000, 100
- },
- (__m512i)(__v32hu){
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50,
- 30000, 200, 20000, 30000, 1, 20000, 20000, 50
- }
- ),
- 60000, 300, 40000, 65535, 65535, 65535, 40000, 150,
- 60000, 300, 40000, 65535, 65535, 65535, 40000, 150,
- 60000, 300, 40000, 65535, 65535, 65535, 40000, 150,
- 60000, 300, 40000, 65535, 65535, 65535, 40000, 150
- )
-);
+match_v32hu(
+_mm512_maskz_adds_epu16(
+(__mmask32)0xAAAAAAAA,
+(__m512i)(__v32hu){
+0, 0, 0, 0, +16384, +16384, +16384, +16384,
++16384, +16384, +32767, +32767, +32767, +32767, +32767, +32767,
++32768, +32768, +32768, +32768, +32768, +32768, +49152, +49152,
++49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535
+},
+(__m512i)(__v32hu){
+ 0, +32767, +32768, +65535, 0, +16384, +32767, +32768,
++49152, +65535, 0, +16384, +32767, +32768, +49152, +65535,
+ 0, +16384, +32767, +32768, +49152, +65535, 0, +16384,
++32767, +32768, +49152, +65535, 0, +32767, +32768, +65535
+}),
+
+0, +32767, 0, +65535,0, +32768, 0, +49152,
+0, +65535, 0, +49151,0, +65535, 0, +65535,
+0, +49152, 0, +65535,0, +65535, 0, +65535,
+0, +65535, 0, +65535,0, +65535, 0, +65535
+));
__m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
@@ -2088,34 +1988,34 @@ TEST_CONSTEXPR(
},
(__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
- 1, -100, 3, 4, 5, 6, 7, 8,
- 9, -100, 11, 12, 13, 14, 15, 16,
- 17, -100, 19, 20, 21, 22, 23, 24,
- 25, -100, 27, 28, 29, 30, 31, 32,
- 33, -100, 35, 36, 37, 38, 39, 40,
- 41, -100, 43, 44, 45, 46, 47, 48,
- 49, -100, 51, 52, 53, 54, 55, 56,
- 57, -100, 59, 60, 61, 62, 63, 64
+ 1, -100, 3, 4, 5, - 6, 7, 100,
+ 9, -100, 11, 12, 13, -14, 15, 100,
+ 17, -100, 19, 20, 21, -22, 23, 100,
+ 25, -100, 27, 28, 29, -30, 31, 100,
+ 33, -100, 35, 36, 37, -38, 39, 100,
+ 41, -100, 43, 44, 45, -46, 47, 100,
+ 49, -100, 51, 52, 53, -54, 55, 100,
+ 57, -100, 59, 60, 61, -62, 63, 100
},
(__m512i)(__v64qs){
- 1, 100, 3, 4, 5, 6, 7, 8,
- 9, 100, 11, 12, 13, 14, 15, 16,
- 17, 100, 19, 20, 21, 22, 23, 24,
- 25, 100, 27, 28, 29, 30, 31, 32,
- 33, 100, 35, 36, 37, 38, 39, 40,
- 41, 100, 43, 44, 45, 46, 47, 48,
- 49, 100, 51, 52, 53, 54, 55, 56,
- 57, 100, 59, 60, 61, 62, 63, 64
+ 1, 100, 3, 4, 5, 6, 7, -100,
+ 9, 100, 11, 12, 13, 14, 15, -100,
+ 17, 100, 19, 20, 21, 22, 23, -100,
+ 25, 100, 27, 28, 29, 30, 31, -100,
+ 33, 100, 35, 36, 37, 38, 39, -100,
+ 41, 100, 43, 44, 45, 46, 47, -100,
+ 49, 100, 51, 52, 53, 54, 55, -100,
+ 57, 100, 59, 60, 61, 62, 63, -100
}
),
- 1, -128, 3, 0, 5, 0, 7, 0,
- 9, -128, 11, 0, 13, 0, 15, 0,
- 17, -128, 19, 0, 21, 0, 23, 0,
- 25, -128, 27, 0, 29, 0, 31, 0,
- 33, -128, 35, 0, 37, 0, 39, 0,
- 41, -128, 43, 0, 45, 0, 47, 0,
- 49, -128, 51, 0, 53, 0, 55, 0,
- 57, -128, 59, 0, 61, 0, 63, 0
+ 1, -128, 3, 0, 5, -12, 7, 127,
+ 9, -128, 11, 0, 13, -28, 15, 127,
+ 17, -128, 19, 0, 21, -44, 23, 127,
+ 25, -128, 27, 0, 29, -60, 31, 127,
+ 33, -128, 35, 0, 37, -76, 39, 127,
+ 41, -128, 43, 0, 45, -92, 47, 127,
+ 49, -128, 51, 0, 53, -108, 55, 127,
+ 57, -128, 59, 0, 61, -124, 63, 127
)
);
@@ -2130,34 +2030,34 @@ TEST_CONSTEXPR(
_mm512_maskz_subs_epi8(
(__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
- 1, -100, 3, 4, 5, 6, 7, 8,
- 9, -100, 11, 12, 13, 14, 15, 16,
- 17, -100, 19, 20, 21, 22, 23, 24,
- 25, -100, 27, 28, 29, 30, 31, 32,
- 33, -100, 35, 36, 37, 38, 39, 40,
- 41, -100, 43, 44, 45, 46, 47, 48,
- 49, -100, 51, 52, 53, 54, 55, 56,
- 57, -100, 59, 60, 61, 62, 63, 64
+ 1, -100, 3, 4, 5, - 6, 7, 100,
+ 9, -100, 11, 12, 13, -14, 15, 100,
+ 17, -100, 19, 20, 21, -22, 23, 100,
+ 25, -100, 27, 28, 29, -30, 31, 100,
+ 33, -100, 35, 36, 37, -38, 39, 100,
+ 41, -100, 43, 44, 45, -46, 47, 100,
+ 49, -100, 51, 52, 53, -54, 55, 100,
+ 57, -100, 59, 60, 61, -62, 63, 100
},
(__m512i)(__v64qs){
- 1, 100, 3, 4, 5, 6, 7, 8,
- 9, 100, 11, 12, 13, 14, 15, 16,
- 17, 100, 19, 20, 21, 22, 23, 24,
- 25, 100, 27, 28, 29, 30, 31, 32,
- 33, 100, 35, 36, 37, 38, 39, 40,
- 41, 100, 43, 44, 45, 46, 47, 48,
- 49, 100, 51, 52, 53, 54, 55, 56,
- 57, 100, 59, 60, 61, 62, 63, 64
+ 1, 100, 3, 4, 5, 6, 7, -100,
+ 9, 100, 11, 12, 13, 14, 15, -100,
+ 17, 100, 19, 20, 21, 22, 23, -100,
+ 25, 100, 27, 28, 29, 30, 31, -100,
+ 33, 100, 35, 36, 37, 38, 39, -100,
+ 41, 100, 43, 44, 45, 46, 47, -100,
+ 49, 100, 51, 52, 53, 54, 55, -100,
+ 57, 100, 59, 60, 61, 62, 63, -100
}
),
- 0, -128, 0, 0, 0, 0, 0, 0,
- 0, -128, 0, 0, 0, 0, 0, 0,
- 0, -128, 0, 0, 0, 0, 0, 0,
- 0, -128, 0, 0, 0, 0, 0, 0,
- 0, -128, 0, 0, 0, 0, 0, 0,
- 0, -128, 0, 0, 0, 0, 0, 0,
- 0, -128, 0, 0, 0, 0, 0, 0,
- 0, -128, 0, 0, 0, 0, 0, 0
+ 0, -128,0, 0,0, -12,0, 127,
+ 0, -128,0, 0,0, -28,0, 127,
+ 0, -128,0, 0,0, -44,0, 127,
+ 0, -128,0, 0,0, -60,0, 127,
+ 0, -128,0, 0,0, -76,0, 127,
+ 0, -128,0, 0,0, -92,0, 127,
+ 0, -128,0, 0,0, -108,0, 127,
+ 0, -128,0, 0,0, -124,0, 127
)
);
@@ -2185,22 +2085,22 @@ TEST_CONSTEXPR(
},
(__mmask32)0xAAAAAAAAu,
(__m512i)(__v32hi){
- 1, -30000, 3, 4, 5, 6, 7, 8,
- 9, -30000, 11, 12, 13, 14, 15, 16,
- 17, -30000, 19, 20, 21, 22, 23, 24,
- 25, -30000, 27, 28, 29, 30, 31, 32
+ 1, -30000, 3, 30000, 5, -6, 7, 8,
+ 9, -30000, 11, 30000, 13, -14, 15, 16,
+ 17, -30000, 19, 30000, 21, -22, 23, 24,
+ 25, -30000, 27, 30000, 29, -30, 31, 32
},
(__m512i)(__v32hi){
- 1, 30000, 3, 4, 5, 6, 7, 8,
- 9, 30000, 11, 12, 13, 14, 15, 16,
- 17, 30000, 19, 20, 21, 22, 23, 24,
- 25, 30000, 27, 28, 29, 30, 31, 32
+ 1, 30000, 3, -30000, 5, 6, 7, -8,
+ 9, 30000, 11, -30000, 13, 14, 15, -16,
+ 17, 30000, 19, -30000, 21, 22, 23, -24,
+ 25, 30000, 27, -30000, 29, 30, 31, -32
}
),
- 1, -32768, 3, 0, 5, 0, 7, 0,
- 9, -32768, 11, 0, 13, 0, 15, 0,
- 17, -32768, 19, 0, 21, 0, 23, 0,
- 25, -32768, 27, 0, 29, 0, 31, 0
+ 1, -32768, 3, 32767, 5, -12, 7, 16,
+ 9, -32768, 11, 32767, 13, -28, 15, 32,
+ 17, -32768, 19, 32767, 21, -44, 23, 48,
+ 25, -32768, 27, 32767, 29, -60, 31, 64
)
);
@@ -2215,22 +2115,22 @@ TEST_CONSTEXPR(
_mm512_maskz_subs_epi16(
(__mmask32)0xAAAAAAAAu,
(__m512i)(__v32hi){
- 1, -30000, 3, 4, 5, 6, 7, 8,
- 9, -30000, 11, 12, 13, 14, 15, 16,
- 17, -30000, 19, 20, 21, 22, 23, 24,
- 25, -30000, 27, 28, 29, 30, 31, 32
+ 1, -30000, 3, 30000, 5, -6, 7, 8,
+ 9, -30000, 11, 30000, 13, -14, 15, 16,
+ 17, -30000, 19, 30000, 21, -22, 23, 24,
+ 25, -30000, 27, 30000, 29, -30, 31, 32
},
(__m512i)(__v32hi){
- 1, 30000, 3, 4, 5, 6, 7, 8,
- 9, 30000, 11, 12, 13, 14, 15, 16,
- 17, 30000, 19, 20, 21, 22, 23, 24,
- 25, 30000, 27, 28, 29, 30, 31, 32
+ 1, 30000, 3, -30000, 5, 6, 7, -8,
+ 9, 30000, 11, -30000, 13, 14, 15, -16,
+ 17, 30000, 19, -30000, 21, 22, 23, -24,
+ 25, 30000, 27, -30000, 29, 30, 31, -32
}
),
- 0, -32768, 0, 0, 0, 0, 0, 0,
- 0, -32768, 0, 0, 0, 0, 0, 0,
- 0, -32768, 0, 0, 0, 0, 0, 0,
- 0, -32768, 0, 0, 0, 0, 0, 0
+ 0, -32768, 0, 32767, 0, -12, 0, 16,
+ 0, -32768, 0, 32767, 0, -28, 0, 32,
+ 0, -32768, 0, 32767, 0, -44, 0, 48,
+ 0, -32768, 0, 32767, 0, -60, 0, 64
)
);
@@ -2247,8 +2147,9 @@ __m512i test_mm512_mask_subs_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m51
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_mask_subs_epu8(__W,__U,__A,__B);
}
+
TEST_CONSTEXPR(
- match_v64qi(
+ match_v64qu(
_mm512_mask_subs_epu8(
(__m512i)(__v64qu){
1, 2, 3, 4, 5, 6, 7, 8,
@@ -2262,34 +2163,34 @@ TEST_CONSTEXPR(
},
(__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qu){
- 1, 100, 3, 4, 5, 6, 7, 8,
- 9, 100, 11, 12, 13, 14, 15, 16,
- 17, 100, 19, 20, 21, 22, 23, 24,
- 25, 100, 27, 28, 29, 30, 31, 32,
- 33, 100, 35, 36, 37, 38, 39, 40,
- 41, 100, 43, 44, 45, 46, 47, 48,
- 49, 100, 51, 52, 53, 54, 55, 56,
- 57, 100, 59, 60, 61, 62, 63, 64
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255
},
(__m512i)(__v64qu){
- 1, 200, 3, 4, 5, 6, 7, 8,
- 9, 200, 11, 12, 13, 14, 15, 16,
- 17, 200, 19, 20, 21, 22, 23, 24,
- 25, 200, 27, 28, 29, 30, 31, 32,
- 33, 200, 35, 36, 37, 38, 39, 40,
- 41, 200, 43, 44, 45, 46, 47, 48,
- 49, 200, 51, 52, 53, 54, 55, 56,
- 57, 200, 59, 60, 61, 62, 63, 64
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255
}
),
- 1, 0, 3, 0, 5, 0, 7, 0,
- 9, 0, 11, 0, 13, 0, 15, 0,
- 17, 0, 19, 0, 21, 0, 23, 0,
- 25, 0, 27, 0, 29, 0, 31, 0,
- 33, 0, 35, 0, 37, 0, 39, 0,
- 41, 0, 43, 0, 45, 0, 47, 0,
- 49, 0, 51, 0, 53, 0, 55, 0,
- 57, 0, 59, 0, 61, 0, 63, 0
+ 1, 200, 3, 0, 5, 0, 7, 254,
+ 9, 0, 11, 1, 13, 1, 15, 0,
+ 17, 200,19, 0, 21, 0, 23, 254,
+ 25, 0,27, 1, 29, 1, 31, 0,
+ 33, 200,35, 0, 37, 0, 39, 254,
+ 41, 0,43, 1, 45, 1, 47, 0,
+ 49, 200,51, 0, 53, 0, 55, 254,
+ 57, 0,59, 1, 61, 1, 63, 0
)
);
@@ -2302,39 +2203,40 @@ __m512i test_mm512_maskz_subs_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
return _mm512_maskz_subs_epu8(__U,__A,__B);
}
+
TEST_CONSTEXPR(
- match_v64qi(
+ match_v64qu(
_mm512_maskz_subs_epu8(
(__mmask64)0xAAAAAAAAAAAAAAAA,
(__m512i)(__v64qu){
- 1, 100, 3, 4, 5, 6, 7, 8,
- 9, 100, 11, 12, 13, 14, 15, 16,
- 17, 100, 19, 20, 21, 22, 23, 24,
- 25, 100, 27, 28, 29, 30, 31, 32,
- 33, 100, 35, 36, 37, 38, 39, 40,
- 41, 100, 43, 44, 45, 46, 47, 48,
- 49, 100, 51, 52, 53, 54, 55, 56,
- 57, 100, 59, 60, 61, 62, 63, 64
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255
},
(__m512i)(__v64qu){
- 1, 200, 3, 4, 5, 6, 7, 8,
- 9, 200, 11, 12, 13, 14, 15, 16,
- 17, 200, 19, 20, 21, 22, 23, 24,
- 25, 200, 27, 28, 29, 30, 31, 32,
- 33, 200, 35, 36, 37, 38, 39, 40,
- 41, 200, 43, 44, 45, 46, 47, 48,
- 49, 200, 51, 52, 53, 54, 55, 56,
- 57, 200, 59, 60, 61, 62, 63, 64
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255
}
),
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0
+ 0, 200,0, 0,0, 0,0, 254,
+ 0, 0,0, 1,0, 1,0, 0,
+ 0, 200,0, 0,0, 0,0, 254,
+ 0, 0,0, 1,0, 1,0, 0,
+ 0, 200,0, 0,0, 0,0, 254,
+ 0, 0,0, 1,0, 1,0, 0,
+ 0, 200,0, 0,0, 0,0, 254,
+ 0, 0,0, 1,0, 1,0, 0
)
);
@@ -2354,33 +2256,36 @@ __m512i test_mm512_mask_subs_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m5
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_subs_epu16(__W,__U,__A,__B);
}
+
TEST_CONSTEXPR(
match_v32hu(
_mm512_mask_subs_epu16(
(__m512i)(__v32hu){
- 1, 2, 3, 4, 5, 6, 7, 8,
- 9, 10, 11, 12, 13, 14, 15, 16,
- 17, 18, 19, 20, 21, 22, 23, 24,
- 25, 26, 27, 28, 29, 30, 31, 32
+ 101, 102, 103, 104, 105, 106, 107, 108,
+ 109, 110, 111, 112, 113, 114, 115, 116,
+ 117, 118, 119, 120, 121, 122, 123, 124,
+ 125, 126, 127, 128, 129, 130, 131, 132
},
(__mmask32)0xAAAAAAAAu,
(__m512i)(__v32hu){
- 1, 30000, 3, 4, 5, 6, 7, 8,
- 9, 30000, 11, 12, 13, 14, 15, 16,
- 17, 30000, 19, 20, 21, 22, 23, 24,
- 25, 30000, 27, 28, 29, 30, 31, 32
+
+ 0, 65000, 0, 40000, 0, 100, 0, 65535,
+ 0, 0, 0, 1000, 0, 1, 0, 50000,
+ 0, 65000, 0, 40000, 0, 100, 0, 65535,
+ 0, 0, 0, 1000, 0, 1, 0, 50000
},
(__m512i)(__v32hu){
- 1, 60000, 3, 4, 5, 6, 7, 8,
- 9, 60000, 11, 12, 13, 14, 15, 16,
- 17, 60000, 19, 20, 21, 22, 23, 24,
- 25, 60000, 27, 28, 29, 30, 31, 32
+
+ 0, 5000, 0, 40000, 0, 200, 0, 1,
+ 0, 1, 0, 65535, 0, 0, 0, 25000,
+ 0, 5000, 0, 40000, 0, 200, 0, 1,
+ 0, 1, 0, 65535, 0, 0, 0, 25000
}
),
- 1, 0, 3, 0, 5, 0, 7, 0,
- 9, 0, 11, 0, 13, 0, 15, 0,
- 17, 0, 19, 0, 21, 0, 23, 0,
- 25, 0, 27, 0, 29, 0, 31, 0
+ 101, 60000, 103, 0, 105, 0, 107, 65534,
+ 109, 0, 111, 0, 113, 1, 115, 25000,
+ 117, 60000, 119, 0, 121, 0, 123, 65534,
+ 125, 0, 127, 0, 129, 1, 131, 25000
)
);
@@ -2391,27 +2296,28 @@ __m512i test_mm512_maskz_subs_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_subs_epu16(__U,__A,__B);
}
+
TEST_CONSTEXPR(
match_v32hu(
_mm512_maskz_subs_epu16(
(__mmask32)0xAAAAAAAAu,
(__m512i)(__v32hu){
- 1, 30000, 3, 4, 5, 6, 7, 8,
- 9, 30000, 11, 12, 13, 14, 15, 16,
- 17, 30000, 19, 20, 21, 22, 23, 24,
- 25, 30000, 27, 28, 29, 30, 31, 32
+ 51, 65000, 0, 40000, 0, 100, 0, 65535,
+ 42, 0, 0, 1000, 0, 1, 0, 50000,
+ 69, 65000, 0, 40000, 0, 100, 0, 65535,
+ 71, 0, 0, 1000, 0, 1, 0, 50000
},
(__m512i)(__v32hu){
- 1, 60000, 3, 4, 5, 6, 7, 8,
- 9, 60000, 11, 12, 13, 14, 15, 16,
- 17, 60000, 19, 20, 21, 22, 23, 24,
- 25, 60000, 27, 28, 29, 30, 31, 32
+ 2652, 5000, 0, 40000, 0, 200, 0, 1,
+ 398, 1, 0, 65535, 0, 0, 0, 25000,
+ 29625, 5000, 0, 40000, 0, 200, 0, 1,
+ 25274, 1, 0, 65535, 0, 0, 0, 25000
}
),
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0
+ 0, 60000,0, 0, 0, 0,0, 65534,
+ 0, 0,0, 0, 0, 1,0, 25000,
+ 0, 60000,0, 0, 0, 0,0, 65534,
+ 0, 0,0, 0, 0, 1,0, 25000
)
);
@@ -3278,7 +3184,7 @@ TEST_CONSTEXPR(
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31
},
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xAAAAAAAA,
(__m512i)(__v32hi){
-0, -1, -2, -3, -4, -5, -6, -7,
-8, -9, -10, -11, -12, -13, -14, -15,
@@ -3301,7 +3207,7 @@ __m512i test_mm512_maskz_mov_epi16(__mmask32 __U, __m512i __A) {
TEST_CONSTEXPR(
match_v32hi(
_mm512_maskz_mov_epi16(
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xAAAAAAAA,
(__m512i)(__v32hi){
-0, -1, -2, -3, -4, -5, -6, -7,
-8, -9, -10, -11, -12, -13, -14, -15,
@@ -3469,16 +3375,16 @@ TEST_CONSTEXPR(
57, 58, 59, 60, 61, 62, 63, 64
},
(__mmask64)0xAAAAAAAAAAAAAAAA,
- (char)-1
+ (char)42
),
- 1, -1, 3, -1, 5, -1, 7, -1,
- 9, -1, 11, -1, 13, -1, 15, -1,
- 17, -1, 19, -1, 21, -1, 23, -1,
- 25, -1, 27, -1, 29, -1, 31, -1,
- 33, -1, 35, -1, 37, -1, 39, -1,
- 41, -1, 43, -1, 45, -1, 47, -1,
- 49, -1, 51, -1, 53, -1, 55, -1,
- 57, -1, 59, -1, 61, -1, 63, -1
+ 1, 42, 3, 42, 5, 42, 7, 42,
+ 9, 42, 11, 42, 13, 42, 15, 42,
+ 17, 42, 19, 42, 21, 42, 23, 42,
+ 25, 42, 27, 42, 29, 42, 31, 42,
+ 33, 42, 35, 42, 37, 42, 39, 42,
+ 41, 42, 43, 42, 45, 42, 47, 42,
+ 49, 42, 51, 42, 53, 42, 55, 42,
+ 57, 42, 59, 42, 61, 42, 63, 42
)
);
@@ -3556,16 +3462,16 @@ TEST_CONSTEXPR(
match_v64qi(
_mm512_maskz_set1_epi8(
(__mmask64)0xAAAAAAAAAAAAAAAA,
- (char)-1
+ (char)42
),
- 0, -1, 0, -1,0, -1, 0, -1,
- 0, -1, 0, -1,0, -1, 0, -1,
- 0, -1, 0, -1,0, -1, 0, -1,
- 0, -1, 0, -1,0, -1, 0, -1,
- 0, -1, 0, -1,0, -1, 0, -1,
- 0, -1, 0, -1,0, -1, 0, -1,
- 0, -1, 0, -1,0, -1, 0, -1,
- 0, -1, 0, -1,0, -1, 0, -1
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42
)
);
@@ -3824,7 +3730,7 @@ TEST_CONSTEXPR(
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, 31
},
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xAAAAAAAA,
(__m128i)(__v8hi){
-120, 1, 2, 3, 4, 5, 6, 7
}
@@ -3903,7 +3809,7 @@ TEST_CONSTEXPR(
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32
},
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xAAAAAAAA,
-1
),
1, -1, 3,-1, 5, -1, 7, -1,
@@ -3953,13 +3859,13 @@ __m512i test_mm512_maskz_set1_epi16(__mmask32 __M, short __A) {
TEST_CONSTEXPR(
match_v32hi(
_mm512_maskz_set1_epi16(
- (__mmask32)0xAAAAAAAAu,
- -1
+ (__mmask32)0xAAAAAAAA,
+ 42
),
- 0, -1,0, -1, 0, -1, 0, -1,
- 0, -1,0, -1, 0, -1, 0, -1,
- 0, -1,0, -1, 0, -1, 0, -1,
- 0, -1,0, -1, 0, -1, 0, -1
+ 0, 42,0, 42, 0, 42, 0, 42,
+ 0, 42,0, 42, 0, 42, 0, 42,
+ 0, 42,0, 42, 0, 42, 0, 42,
+ 0, 42,0, 42, 0, 42, 0, 42
)
);
>From 8009be4023490dbd6cd27fd87b8fbb903d69db2c Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Wed, 8 Oct 2025 21:38:32 +0100
Subject: [PATCH 4/6] [HEADERS][X86] Tidied up some tests
---
clang/test/CodeGen/X86/avx512bw-builtins.c | 1 -
1 file changed, 1 deletion(-)
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 1b36a8009d345..fedcbb23f5b53 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -1482,7 +1482,6 @@ TEST_CONSTEXPR(
9, +18, 11, +22, 13, +26, 15, +30,
17, +34, 19, +38, 21, +42, 23, +46,
25, +50, 27, +54, 29, -32768, 31, +32767
-
)
);
>From 2f8e8caadb31bcc07cdbf9f268ccc8d782004ee3 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Wed, 8 Oct 2025 23:08:38 +0100
Subject: [PATCH 5/6] [HEADERS][X86] Added AVX2 Mask constexpr tests
---
clang/lib/Headers/avx512vlbwintrin.h | 182 ++---
clang/test/CodeGen/X86/avx512bw-builtins.c | 4 +-
clang/test/CodeGen/X86/avx512vlbw-builtins.c | 775 ++++++++++++++++++-
3 files changed, 866 insertions(+), 95 deletions(-)
diff --git a/clang/lib/Headers/avx512vlbwintrin.h b/clang/lib/Headers/avx512vlbwintrin.h
index 5e6daa8f7b260..ae6b65b520479 100644
--- a/clang/lib/Headers/avx512vlbwintrin.h
+++ b/clang/lib/Headers/avx512vlbwintrin.h
@@ -536,14 +536,14 @@ _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) {
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
(__v8hi)_mm_packs_epi32(__A, __B),
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -551,7 +551,7 @@ _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
(__v8hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -559,7 +559,7 @@ _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -567,7 +567,7 @@ _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
(__v16hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -575,7 +575,7 @@ _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -583,7 +583,7 @@ _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
(__v16qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -591,7 +591,7 @@ _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -599,7 +599,7 @@ _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
(__v32qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -607,7 +607,7 @@ _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -615,7 +615,7 @@ _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
(__v8hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -623,7 +623,7 @@ _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -631,7 +631,7 @@ _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
(__v16hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -639,7 +639,7 @@ _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -647,7 +647,7 @@ _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
(__v16qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -655,7 +655,7 @@ _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -663,7 +663,7 @@ _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
(__v32qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -671,7 +671,7 @@ _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -679,7 +679,7 @@ _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -687,7 +687,7 @@ _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -695,7 +695,7 @@ _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -703,7 +703,7 @@ _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -711,7 +711,7 @@ _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -719,7 +719,7 @@ _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -727,7 +727,7 @@ _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -735,7 +735,7 @@ _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -743,7 +743,7 @@ _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -751,7 +751,7 @@ _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -759,7 +759,7 @@ _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -767,7 +767,7 @@ _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -775,7 +775,7 @@ _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -783,7 +783,7 @@ _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1099,7 +1099,7 @@ _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1107,7 +1107,7 @@ _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1115,7 +1115,7 @@ _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1123,7 +1123,7 @@ _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1131,7 +1131,7 @@ _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1139,7 +1139,7 @@ _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1147,7 +1147,7 @@ _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1155,7 +1155,7 @@ _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1163,7 +1163,7 @@ _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1171,7 +1171,7 @@ _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1179,7 +1179,7 @@ _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1187,7 +1187,7 @@ _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1195,7 +1195,7 @@ _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1203,7 +1203,7 @@ _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1211,7 +1211,7 @@ _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
__m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1219,7 +1219,7 @@ _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1444,14 +1444,14 @@ _mm_cvtepi16_epi8(__m128i __A) {
12, 13, 14, 15);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
(__v16qi) __O,
__M);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
(__v16qi) _mm_setzero_si128(),
@@ -1600,112 +1600,112 @@ _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
(__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_unpackhi_epi8(__A, __B),
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_unpackhi_epi8(__A, __B),
(__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_unpackhi_epi8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_unpackhi_epi16(__A, __B),
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_unpackhi_epi16(__A, __B),
(__v8hi) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_unpackhi_epi16(__A, __B),
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_unpackhi_epi16(__A, __B),
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_unpacklo_epi8(__A, __B),
(__v16qi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_unpacklo_epi8(__A, __B),
(__v16qi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_unpacklo_epi8(__A, __B),
(__v32qi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
(__v32qi)_mm256_unpacklo_epi8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_unpacklo_epi16(__A, __B),
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_unpacklo_epi16(__A, __B),
(__v8hi) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_unpacklo_epi16(__A, __B),
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_unpacklo_epi16(__A, __B),
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1713,7 +1713,7 @@ _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1721,7 +1721,7 @@ _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1729,7 +1729,7 @@ _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1738,7 +1738,7 @@ _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1746,7 +1746,7 @@ _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1754,7 +1754,7 @@ _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
(__v8hi)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1762,7 +1762,7 @@ _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
(__v16hi)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
{
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1889,7 +1889,7 @@ _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
(__v16hi)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1897,7 +1897,7 @@ _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
(__v8hi)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2185,7 +2185,7 @@ _mm256_maskz_mov_epi8(__mmask32 __U, __m256i __A) {
(__v32qi) _mm256_setzero_si256 ());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
{
return (__m128i) __builtin_ia32_selectb_128(__M,
@@ -2193,7 +2193,7 @@ _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
(__v16qi) __O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_set1_epi8 (__mmask16 __M, char __A)
{
return (__m128i) __builtin_ia32_selectb_128(__M,
@@ -2201,7 +2201,7 @@ _mm_maskz_set1_epi8 (__mmask16 __M, char __A)
(__v16qi) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
{
return (__m256i) __builtin_ia32_selectb_256(__M,
@@ -2209,7 +2209,7 @@ _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
(__v32qi) __O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
{
return (__m256i) __builtin_ia32_selectb_256(__M,
@@ -2544,7 +2544,7 @@ _mm256_movm_epi16 (__mmask16 __A)
return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
{
return (__m128i)__builtin_ia32_selectb_128(__M,
@@ -2552,7 +2552,7 @@ _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
(__v16qi) __O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
{
return (__m128i)__builtin_ia32_selectb_128(__M,
@@ -2560,7 +2560,7 @@ _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
(__v16qi) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
{
return (__m256i)__builtin_ia32_selectb_256(__M,
@@ -2568,7 +2568,7 @@ _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
(__v32qi) __O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
{
return (__m256i)__builtin_ia32_selectb_256(__M,
@@ -2576,7 +2576,7 @@ _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
(__v32qi) _mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
{
return (__m128i)__builtin_ia32_selectw_128(__M,
@@ -2584,7 +2584,7 @@ _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
(__v8hi) __O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
{
return (__m128i)__builtin_ia32_selectw_128(__M,
@@ -2592,7 +2592,7 @@ _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
(__v8hi) _mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
{
return (__m256i)__builtin_ia32_selectw_256(__M,
@@ -2600,7 +2600,7 @@ _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
(__v16hi) __O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
{
return (__m256i)__builtin_ia32_selectw_256(__M,
@@ -2608,7 +2608,7 @@ _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
(__v16hi) _mm256_setzero_si256());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
{
return (__m256i) __builtin_ia32_selectw_256 (__M,
@@ -2616,7 +2616,7 @@ _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
(__v16hi) __O);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
{
return (__m256i) __builtin_ia32_selectw_256(__M,
@@ -2624,7 +2624,7 @@ _mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
(__v16hi) _mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
{
return (__m128i) __builtin_ia32_selectw_128(__M,
@@ -2632,7 +2632,7 @@ _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
(__v8hi) __O);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_set1_epi16 (__mmask8 __M, short __A)
{
return (__m128i) __builtin_ia32_selectw_128(__M,
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index fedcbb23f5b53..40b46fffb80f9 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -2082,7 +2082,7 @@ TEST_CONSTEXPR(
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32
},
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xAAAAAAAA,
(__m512i)(__v32hi){
1, -30000, 3, 30000, 5, -6, 7, 8,
9, -30000, 11, 30000, 13, -14, 15, 16,
@@ -2267,14 +2267,12 @@ TEST_CONSTEXPR(
},
(__mmask32)0xAAAAAAAAu,
(__m512i)(__v32hu){
-
0, 65000, 0, 40000, 0, 100, 0, 65535,
0, 0, 0, 1000, 0, 1, 0, 50000,
0, 65000, 0, 40000, 0, 100, 0, 65535,
0, 0, 0, 1000, 0, 1, 0, 50000
},
(__m512i)(__v32hu){
-
0, 5000, 0, 40000, 0, 200, 0, 1,
0, 1, 0, 65535, 0, 0, 0, 25000,
0, 5000, 0, 40000, 0, 200, 0, 1,
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 1fe1ec08ede88..9ec1cd0ebeda6 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -1075,12 +1075,36 @@ __m256i test_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_packs_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_packs_epi32(
+(__mmask32)0xAAAA,
+(__m256i)(__v8si){
+40000, -50000, 32767, -32768, 32768, -32769, 65535, -65536
+}, (__m256i)(__v8si){
+0, 1, -1, 65536, 22222, -22222, 40000, -40000
+}),
+ 0, -32768,0, -32768,0, 1,0, 32767,
+ 0, -32768,0, -32768,0, -22222,0, -32768));
+
__m256i test_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packs_epi32
// CHECK: @llvm.x86.avx2.packssdw
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_packs_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_packs_epi32(
+(__m256i)(__v16hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 25, 26, 27, 28, 29, 30, 31, 32
+},
+(__mmask32)0xAAAA,
+(__m256i)(__v8si){
+40000, -50000, 32767, -32768, 32768, -32769, 65535, -65536
+}, (__m256i)(__v8si){
+0, 1, -1, 65536, 22222, -22222, 40000, -40000
+}),
+ 1, -32768, 3, -32768, 5, 1, 7, 32767,
+ 25, -32768, 27, -32768, 29, -22222, 31, -32768));
+
__m128i test_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_packs_epi16
// CHECK: @llvm.x86.sse2.packsswb
@@ -1099,12 +1123,55 @@ __m256i test_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_packs_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_maskz_packs_epi16(
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v16hi){
+ 130, -200, 127, -128, 300, -1000, 42, -42,
+ 500, -500, 7, -7, 255, -255, 127, -128
+},
+ (__m256i)(__v16hi){
+0, 1, -1, 255, -129, 128, 20000, -32768,
+0, 1, -1, 127, -128, 90, -90, -32768
+}),
+0, -128,0, -128,0, -128,0, -42,
+0, 1,0, 127,0, 127, 0, -128,
+0, -128,0, -7,0, -128,0, -128,
+0, 1,0, 127,0, 90, 0, -128
+ )
+);
+
__m256i test_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packs_epi16
// CHECK: @llvm.x86.avx2.packsswb
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_packs_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_mask_packs_epi16(
+ (__m256i)(__v32qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v16hi){
+ 130, -200, 127, -128, 300, -1000, 42, -42,
+ 500, -500, 7, -7, 255, -255, 127, -128
+},
+ (__m256i)(__v16hi){
+0, 1, -1, 255, -129, 128, 20000, -32768,
+0, 1, -1, 127, -128, 90, -90, -32768
+}),
+ 1, -128, 3, -128, 5, -128, 7, -42,
+ 9, 1, 11, 127, 13, 127, 15, -128,
+49, -128, 51, -7, 53, -128,55, -128,
+57, 1, 59, 127, 61, 90, 63, -128
+ )
+);
__m128i test_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packus_epi32
@@ -1126,6 +1193,17 @@ __m256i test_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_packus_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_packus_epi32(
+ (__mmask16)0xAAAA,
+(__m256i)(__v8si){
+40000, -50000, 32767, -32768, 32768, -32769, 22222, -22222
+}, (__m256i)(__v8si){
+0, 1, -1, 65536, 40000, -40000, 65535, 0
+}),
+0, 0,0, 0,0, 1,0, -1,
+0, 0,0, 0,0, 0,0, 0
+));
+
__m256i test_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packus_epi32
@@ -1133,6 +1211,20 @@ __m256i test_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_packus_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_packus_epi32(
+ (__m256i)(__v16hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask16)0xAAAA,
+(__m256i)(__v8si){
+40000, -50000, 32767, -32768, 32768, -32769, 22222, -22222
+}, (__m256i)(__v8si){
+0, 1, -1, 65536, 40000, -40000, 65535, 0
+}),
+ 1, 0, 3, 0, 5, 1, 7, -1,
+25, 0,27, 0, 29, 0, 31, 0
+));
__m128i test_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_packus_epi16
@@ -1154,6 +1246,20 @@ __m256i test_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_packus_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_maskz_packus_epi16(
+ (__mmask32)0xAAAAAAAA,
+(__m256i)(__v16hi){
+-1, 0, 1, 127, 128, 255, 256, -200,
+300, 42, -42, 500, 20000, -32768, 129, -129
+}, (__m256i)(__v16hi){
+0, 1, -1, 255, -129, 128, 20000, -32768,
+32767, -32767, 127, -128, 30000, -30000, 90, -90
+}),
+0, 0,0, 127,0, -1,0, 0,
+0, 1,0, -1,0, -128,0, 0,
+0, 42,0, -1,0, 0,0, 0,
+0, 0,0, 0,0, 0,0, 0
+));
__m256i test_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_packus_epi16
@@ -1161,6 +1267,26 @@ __m256i test_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_packus_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(_mm256_mask_packus_epi16(
+ (__m256i)(__v32qi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+(__m256i)(__v16hi){
+-1, 0, 1, 127, 128, 255, 256, -200,
+300, 42, -42, 500, 20000, -32768, 129, -129
+}, (__m256i)(__v16hi){
+0, 1, -1, 255, -129, 128, 20000, -32768,
+32767, -32767, 127, -128, 30000, -30000, 90, -90
+}),
+ 1, 0, 3, 127, 5, -1, 7, 0,
+ 9, 1, 11, -1, 13, -128, 15, 0,
+49, 42, 51, -1, 53, 0, 55, 0,
+57, 0, 59, 0, 61, 0, 63, 0
+));
__m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epi8
@@ -1180,12 +1306,63 @@ __m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m25
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_adds_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qi(
+ _mm256_mask_adds_epi8(
+ (__m256i)(__v32qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
++100, +50, -100, +20, +80, -50, +120, -20,
+-100, -50, +100, -20, -80, +50, -120, +20
+},
+ (__m256i)(__v32qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ +50, +80, -50, +110, +60, -30, +20, -10,
+ +50, +80, -50, +110, +60, -30, +20, -10
+ }
+ ),
+ 1, +2, 3, +6, 5, +10, 7, +14,
+ 9, +18, 11, +22, 13, +26, 15, +30,
+ 49, +127, 51, +127, 53, -80, +55, -30,
+ 57, +30, 59, +90, 61, +20, 63, +10
+));
+
__m256i test_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epi8
// CHECK: @llvm.sadd.sat.v32i8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_adds_epi8(__U,__A,__B);
}
+
+TEST_CONSTEXPR(match_v32qi(
+ _mm256_maskz_adds_epi8(
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
++100, +50, -100, +20, +80, -50, +120, -20,
+-100, -50, +100, -20, -80, +50, -120, +20
+},
+ (__m256i)(__v32qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -8, +9, -10, +11, -12, +13, -14, +15,
+ +50, +80, -50, +110, +60, -30, +20, -10,
+ +50, +80, -50, +110, +60, -30, +20, -10
+ }
+ ),
+ 0, +2,0, +6, 0, +10,0, +14,
+ 0, +18,0, +22, 0, +26,0, +30,
+ 0, +127,0, +127, 0, -80,0, -30,
+ 0, +30,0, +90, 0, +20,0, +10
+));
+
__m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epi16
// CHECK: @llvm.sadd.sat.v8i16
@@ -1204,12 +1381,53 @@ __m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m2
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_adds_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_mask_adds_epi16(
+ (__m256i)(__v16hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ },
+ (__mmask16)0xAAAA,
+ (__m256i)(__v16hi){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -24, +25, -26, +27, +32000, -32000, +32000, +32000
+ },
+ (__m256i)(__v16hi){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -24, +25, -26, +27, +800, -800, -800, +800
+ }
+ ),
+ 1, +2, 3, +6, 5, +10, 7, +14,
+ 9, +50, 11, +54, 13, -32768, 15, +32767
+ )
+);
+
+
__m256i test_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epi16
// CHECK: @llvm.sadd.sat.v16i16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_adds_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_maskz_adds_epi16(
+ (__mmask16)0xAAAA,
+ (__m256i)(__v16hi){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -24, +25, -26, +27, +32000, -32000, +32000, +32000
+ },
+ (__m256i)(__v16hi){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -24, +25, -26, +27, +800, -800, -800, +800
+ }
+ ),
+ 0, +2, 0, +6, 0, +10, 0, +14,
+ 0, +50, 0, +54, 0, -32768, 0, +32767
+ )
+);
+
__m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epu8
// CHECK-NOT: @llvm.x86.sse2.paddus.b
@@ -1231,6 +1449,31 @@ __m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m25
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_adds_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qu(_mm256_mask_adds_epu8((__m256i)(__v32qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qu){
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ +63, +63, +63, +63, +63, +63, +63, +63,
++192, +192, +192, +192, +192, +192, +192, +192,
++255, +255, +255, +255, +255, +255, +255, +255
+}, (__m256i)(__v32qu){
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255}),
+
+ 1, +63, 3, +127, 5, +191, 7, +255,
+ 9, +126, 11, +190, 13, +254, 15, +255,
+49, +255, 51, +255, 53, +255, 55, +255,
+57, +255, 59, +255, 61, +255, 63, +255
+));
+
+
__m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epu8
// CHECK-NOT: @llvm.x86.avx2.paddus.b
@@ -1238,6 +1481,25 @@ __m256i test_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_adds_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v32qu(_mm256_maskz_adds_epu8(
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qu){
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ +63, +63, +63, +63, +63, +63, +63, +63,
++192, +192, +192, +192, +192, +192, +192, +192,
++255, +255, +255, +255, +255, +255, +255, +255
+}, (__m256i)(__v32qu){
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255}),
+
+0, +63,0, +127,0, +191,0, +255,
+0, +126,0, +190,0, +254,0, +255,
+0, +255,0, +255,0, +255,0, +255,
+0, +255,0, +255,0, +255,0, +255
+));
+
__m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_adds_epu16
// CHECK-NOT: @llvm.x86.sse2.paddus.w
@@ -1259,6 +1521,26 @@ __m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m2
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_adds_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+match_v16hu(
+_mm256_mask_adds_epu16(
+(__m256i)(__v16hu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 25, 26, 27, 28, 29, 30, 31, 32
+},
+(__mmask16)0xAAAA,
+(__m256i)(__v16hu){
+0, 0, 0, 0, +16384, +16384, +16384, +16384,
++49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535
+},
+(__m256i)(__v16hu){
+ 0, +32767, +32768, +65535, 0, +16384, +32767, +32768,
++32767, +32768, +49152, +65535, 0, +32767, +32768, +65535
+}),
+
+ 1, +32767, 3, +65535, 5, +32768, 7, +49152,
+25, +65535, 27, +65535, 29, +65535, 31, +65535
+));
__m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_adds_epu16
// CHECK-NOT: @llvm.x86.avx2.paddus.w
@@ -1266,6 +1548,24 @@ __m256i test_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_adds_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+match_v16hu(
+_mm256_maskz_adds_epu16(
+(__mmask16)0xAAAA,
+(__m256i)(__v16hu){
+0, 0, 0, 0, +16384, +16384, +16384, +16384,
++49152, +49152, +49152, +49152, +65535, +65535, +65535, +65535
+},
+(__m256i)(__v16hu){
+ 0, +32767, +32768, +65535, 0, +16384, +32767, +32768,
++32767, +32768, +49152, +65535, 0, +32767, +32768, +65535
+}),
+
+0, +32767,0, +65535,0, +32768,0, +49152,
+0, +65535,0, +65535,0, +65535,0, +65535
+));
+
+
__m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_avg_epu8
// CHECK: @llvm.x86.sse2.pavg.b
@@ -1724,12 +2024,66 @@ __m256i test_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m25
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_subs_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_mask_subs_epi8(
+ (__m256i)(__v32qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qs){
+ 1, -100, 3, 4, 5, - 6, 7, 100,
+ 9, -100, 11, 12, 13, -14, 15, 100,
+ 49, -100, 51, 52, 53, -54, 55, 100,
+ 57, -100, 59, 60, 61, -62, 63, 100
+ },
+ (__m256i)(__v32qs){
+ 1, 100, 3, 4, 5, 6, 7, -100,
+ 9, 100, 11, 12, 13, 14, 15, -100,
+ 49, 100, 51, 52, 53, 54, 55, -100,
+ 57, 100, 59, 60, 61, 62, 63, -100
+ }
+ ),
+ 1, -128, 3, 0, 5, -12, 7, 127,
+ 9, -128, 11, 0, 13, -28, 15, 127,
+ 49, -128, 51, 0, 53, -108, 55, 127,
+ 57, -128, 59, 0, 61, -124, 63, 127
+ )
+);
+
__m256i test_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epi8
// CHECK: @llvm.ssub.sat.v32i8
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_subs_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_maskz_subs_epi8(
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qs){
+ 1, -100, 3, 4, 5, - 6, 7, 100,
+ 9, -100, 11, 12, 13, -14, 15, 100,
+ 49, -100, 51, 52, 53, -54, 55, 100,
+ 57, -100, 59, 60, 61, -62, 63, 100
+ },
+ (__m256i)(__v32qs){
+ 1, 100, 3, 4, 5, 6, 7, -100,
+ 9, 100, 11, 12, 13, 14, 15, -100,
+ 49, 100, 51, 52, 53, 54, 55, -100,
+ 57, 100, 59, 60, 61, 62, 63, -100
+ }
+ ),
+ 0, -128, 0, 0, 0, -12, 0, 127,
+ 0, -128, 0, 0, 0, -28, 0, 127,
+ 0, -128, 0, 0, 0, -108, 0, 127,
+ 0, -128, 0, 0, 0, -124, 0, 127
+ )
+);
+
__m128i test_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_subs_epi16
// CHECK: @llvm.ssub.sat.v8i16
@@ -1742,12 +2096,52 @@ __m128i test_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_subs_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_maskz_subs_epi16(
+ (__mmask16)0xAAAA,
+ (__m256i)(__v16hi){
+ 1, -30000, 3, 30000, 5, -6, 7, 8,
+ 25, -30000, 27, 30000, 29, -30, 31, 32
+ },
+ (__m256i)(__v16hi){
+ 1, 30000, 3, -30000, 5, 6, 7, -8,
+ 25, 30000, 27, -30000, 29, 30, 31, -32
+ }
+ ),
+ 0, -32768, 0, 32767, 0, -12, 0, 16,
+ 0, -32768, 0, 32767, 0, -60, 0, 64
+ )
+);
+
__m256i test_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epi16
// CHECK: @llvm.ssub.sat.v16i16
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_subs_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_mask_subs_epi16(
+ (__m256i)(__v16hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask16)0xAAAA,
+ (__m256i)(__v16hi){
+ 1, -30000, 3, 30000, 5, -6, 7, 8,
+ 25, -30000, 27, 30000, 29, -30, 31, 32
+ },
+ (__m256i)(__v16hi){
+ 1, 30000, 3, -30000, 5, 6, 7, -8,
+ 25, 30000, 27, -30000, 29, 30, 31, -32
+ }
+ ),
+ 1, -32768, 3, 32767, 5, -12, 7, 16,
+ 25, -32768, 27, 32767, 29, -60, 31, 64
+ )
+);
+
__m256i test_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epi16
// CHECK: @llvm.ssub.sat.v16i16
@@ -1775,6 +2169,35 @@ __m256i test_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m25
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_subs_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32qu(
+ _mm256_mask_subs_epu8(
+ (__m256i)(__v32qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qu){
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255
+ },
+ (__m256i)(__v32qu){
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255
+ }
+ ),
+ 1, 200, 3, 0, 5, 0, 7, 254,
+ 9, 0, 11, 1, 13, 1, 15, 0,
+ 49, 200,51, 0, 53, 0, 55, 254,
+ 57, 0,59, 1, 61, 1, 63, 0
+ )
+);
__m256i test_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epu8
// CHECK-NOT: @llvm.x86.avx2.psubus.b
@@ -1782,6 +2205,29 @@ __m256i test_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_subs_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v32qu(
+ _mm256_maskz_subs_epu8(
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qu){
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255,
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255
+ },
+ (__m256i)(__v32qu){
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255,
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255
+ }
+ ),
+ 0, 200,0, 0,0, 0,0, 254,
+ 0, 0,0, 1,0, 1,0, 0,
+ 0, 200,0, 0,0, 0,0, 254,
+ 0, 0,0, 1,0, 1,0, 0
+ )
+);
__m128i test_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_subs_epu16
// CHECK-NOT: @llvm.x86.sse2.psubus.w
@@ -1803,6 +2249,28 @@ __m256i test_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m2
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_subs_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16hu(
+ _mm256_mask_subs_epu16(
+ (__m256i)(__v16hu){
+ 101, 102, 103, 104, 105, 106, 107, 108,
+ 125, 126, 127, 128, 129, 130, 131, 132
+ },
+ (__mmask16)0xAAAAu,
+ (__m256i)(__v16hu){
+ 0, 65000, 0, 40000, 0, 100, 0, 65535,
+ 0, 0, 0, 1000, 0, 1, 0, 50000
+ },
+ (__m256i)(__v16hu){
+ 0, 5000, 0, 40000, 0, 200, 0, 1,
+ 0, 1, 0, 65535, 0, 0, 0, 25000
+ }
+ ),
+ 101, 60000, 103, 0, 105, 0, 107, 65534,
+ 125, 0, 127, 0, 129, 1, 131, 25000
+ )
+);
+
__m256i test_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_subs_epu16
// CHECK-NOT: @llvm.x86.avx2.psubus.w
@@ -1810,7 +2278,23 @@ __m256i test_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_subs_epu16(__U,__A,__B);
}
-
+TEST_CONSTEXPR(
+ match_v16hu(
+ _mm256_maskz_subs_epu16(
+ (__mmask16)0xAAAAu,
+ (__m256i)(__v16hu){
+ 0, 65000, 0, 40000, 0, 100, 10, 65535,
+ 0, 0, 0, 1000, 0, 1, 10000, 50000
+ },
+ (__m256i)(__v16hu){
+ 0, 5000, 0, 40000, 0, 200, 0, 1,
+ 0, 1, 0, 65535, 0, 0, 0, 25000
+ }
+ ),
+ 0, 60000, 0, 0, 0, 0, 0, 65534,
+ 0, 0, 0, 0, 0, 1, 0, 25000
+ )
+);
__m128i test_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) {
// CHECK-LABEL: test_mm_mask2_permutex2var_epi16
@@ -2146,6 +2630,27 @@ __m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, _
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_unpackhi_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_mask_unpackhi_epi8(
+ (__m256i)(__v32qs){
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ -104,-103,-102,-101,-100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89
+ },
+ (__m256i)(__v32qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64
+ }
+ ),
+ 1, -9, 3,-10, 5,-11, 7,-12, 9,-13, 11,-14, 13,-15, 15,-16,
+ 49,-57, 51,-58, 53,-59, 55,-60, 57,-61, 59,-62, 61,-63, 63,-64
+ )
+);
__m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpackhi_epi8
@@ -2153,6 +2658,23 @@ __m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_unpackhi_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_maskz_unpackhi_epi8(
+ (__mmask32)0xAAAAAAAA,
+ (__m256i)(__v32qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ -104,-103,-102,-101,-100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89
+ },
+ (__m256i)(__v32qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ -49, -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64
+ }
+ ),
+ 0, -9, 0,-10,0,-11,0,-12,0,-13,0,-14,0,-15,0,-16,
+ 0,-57, 0,-58,0,-59,0,-60,0,-61,0,-62,0,-63,0,-64
+ )
+);
__m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_unpackhi_epi16
@@ -2174,6 +2696,27 @@ __m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A,
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_unpackhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_mask_unpackhi_epi16(
+ (__m256i)(__v16hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask16)0xAAAAu,
+ (__m256i)(__v16hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m256i)(__v16hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 1, 204, 3, 205, 5, 206, 7, 207,
+ 25, 234, 27, 235, 29, 236, 31, 237
+ )
+);
__m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpackhi_epi16
@@ -2181,6 +2724,23 @@ __m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_unpackhi_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_maskz_unpackhi_epi16(
+ (__mmask16)0xAAAAu,
+ (__m256i)(__v16hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m256i)(__v16hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 0, 204, 0, 205, 0, 206, 0, 207,
+ 0, 234, 0, 235, 0, 236, 0, 237
+ )
+);
__m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_unpacklo_epi8
@@ -2202,6 +2762,27 @@ __m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, _
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_unpacklo_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_mask_unpacklo_epi8(
+ (__m256i)(__v32qs){
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAAA,
+ (__m256i)(__v32qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65
+ },
+ (__m256i)(__v32qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75
+ }
+ ),
+ 1, -1, 3, -2, 5, -3, 7, -4, 9, -5, 11, -6, 13, -7, 15, -8,
+ 49, 60, 51, 61, 53, 62, 55, 63, 57, 64, 59, 65, 61, 66, 63, 67
+ )
+);
__m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpacklo_epi8
@@ -2209,6 +2790,23 @@ __m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_unpacklo_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_maskz_unpacklo_epi8(
+ (__mmask32)0xAAAAAAAAA,
+ (__m256i)(__v32qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
+ -50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65
+ },
+ (__m256i)(__v32qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16,
+ 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75
+ }
+ ),
+ 0, -1,0, -2,0, -3,0, -4,0, -5,0, -6,0, -7,0, -8,
+ 0, 60,0, 61,0, 62,0, 63,0, 64,0, 65,0, 66,0, 67
+ )
+);
__m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_unpacklo_epi16
@@ -2230,6 +2828,27 @@ __m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A,
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_unpacklo_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_mask_unpacklo_epi16(
+ (__m256i)(__v16hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask16)0xAAAAu,
+ (__m256i)(__v16hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m256i)(__v16hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 1, 200, 3, 201, 5, 202, 7, 203,
+ 25, 230, 27, 231, 29, 232, 31, 233
+ )
+);
__m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_unpacklo_epi16
@@ -2237,6 +2856,23 @@ __m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_unpacklo_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_maskz_unpacklo_epi16(
+ (__mmask16)0xAAAAu,
+ (__m256i)(__v16hi){
+ 100, 101, 102, 103, 104, 105, 106, 107,
+ 130, 131, 132, 133, 134, 135, 136, 137
+ },
+ (__m256i)(__v16hi){
+ 200, 201, 202, 203, 204, 205, 206, 207,
+ 230, 231, 232, 233, 234, 235, 236, 237
+ }
+ ),
+ 0, 200,0, 201,0, 202,0, 203,
+ 0, 230,0, 231,0, 232,0, 233
+ )
+);
__m128i test_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_cvtepi8_epi16
@@ -2258,6 +2894,11 @@ __m256i test_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_cvtepi8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_cvtepi8_epi16(_mm256_set1_epi16(-777), /*1001 1100 1010 0101=*/0x9ca5,
+(__m128i)(__v16qs){1, -2, 3, -4, 5, -6, 7, -8, 25, -26, 27, -28, 29, -30, 31, -32}),
+1, -777, 3, -777, -777, -6, -777, -8,
+-777, -777, 27, -28, 29, -777, -777, -32));
+
__m256i test_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_cvtepi8_epi16
@@ -2265,6 +2906,10 @@ __m256i test_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_cvtepi8_epi16(__U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_cvtepi8_epi16(/*1001 1100 1010 0101=*/0x9ca5,
+(__m128i)(__v16qs){1, -2, 3, -4, 5, -6, 7, -8, 25, -26, 27, -28, 29, -30, 31, -32}),
+1, 0, 3, 0, 0, -6, 0, -8,
+0, 0, 27, -28, 29, 0, 0, -32));
__m128i test_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_cvtepu8_epi16
@@ -2286,6 +2931,10 @@ __m256i test_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_cvtepu8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_mask_cvtepu8_epi16(_mm256_set1_epi16(-777), /*1001 1100 1010 0101=*/0x9ca5,
+(__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 25, 26, 27, 28, 29, 30, 31, 32}),
+1, -777, 3, -777, -777, 6, -777, 8, -777, -777, 27, 28, 29, -777, -777, 32));
+
__m256i test_mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_cvtepu8_epi16
@@ -2293,6 +2942,10 @@ __m256i test_mm256_maskz_cvtepu8_epi16(__mmask16 __U, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_cvtepu8_epi16(__U, __A);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_maskz_cvtepu8_epi16( /*1001 1100 1010 0101=*/0x9ca5,
+(__m128i)(__v16qu){1, 2, 3, 4, 5, 6, 7, 8, 25, 26, 27, 28, 29, 30, 31, 32}),
+1, 0, 3, 0, 0, 6, 0, 8, 0, 0, 27, 28, 29, 0, 0, 32));
+
__m256i test_mm256_sllv_epi16(__m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_sllv_epi16
@@ -3042,6 +3695,27 @@ __m256i test_mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A)
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_broadcastb_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_mask_broadcastb_epi8(
+ (__m256i)(__v32qs){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63
+ },
+ (__mmask32)0xAAAAAAAA,
+ (__m128i)(__v16qs){
+ -120, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+ }
+ ),
+ 0, -120, 2, -120, 4, -120, 6, -120,
+ 8, -120, 10, -120, 12, -120, 14, -120,
+ 48, -120, 50, -120, 52, -120, 54, -120,
+ 56, -120, 58, -120, 60, -120, 62, -120
+ )
+);
__m256i test_mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_broadcastb_epi8
@@ -3049,6 +3723,21 @@ __m256i test_mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_broadcastb_epi8(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_maskz_broadcastb_epi8(
+ (__mmask32)0xAAAAAAAA,
+ (__m128i)(__v16qs){
+ -120, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+ }
+ ),
+ 0, -120,0, -120,0, -120,0, -120,
+ 0, -120,0, -120,0, -120,0, -120,
+ 0, -120,0, -120,0, -120,0, -120,
+ 0, -120,0, -120,0, -120,0, -120
+ )
+);
__m128i test_mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A) {
// CHECK-LABEL: test_mm_mask_broadcastw_epi16
@@ -3070,6 +3759,22 @@ __m256i test_mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_broadcastw_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_mask_broadcastw_epi16(
+ (__m256i)(__v16hi){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 24, 25, 26, 27, 28, 29, 30, 31
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v8hi){
+ -120, 1, 2, 3, 4, 5, 6, 7
+ }
+ ),
+ 0, -120, 2, -120, 4, -120, 6, -120,
+ 24, -120, 26, -120, 28, -120, 30, -120
+ )
+);
__m256i test_mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_maskz_broadcastw_epi16
@@ -3077,6 +3782,20 @@ __m256i test_mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_broadcastw_epi16(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_maskz_broadcastw_epi16(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v8hi){
+ -120, 1, 2, 3, 4, 5, 6, 7
+ }
+ ),
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120
+ )
+);
+
+
__m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
// CHECK-LABEL: test_mm_mask_set1_epi8
// CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0
@@ -3157,6 +3876,24 @@ __m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_mask_set1_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_mask_set1_epi8(
+ (__m256i)(__v32qi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ 49, 50, 51, 52, 53, 54, 55, 56,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask32)0xAAAAAAAA,
+ (char)42
+ ),
+ 1, 42, 3, 42, 5, 42, 7, 42,
+ 9, 42, 11, 42, 13, 42, 15, 42,
+ 49, 42, 51, 42, 53, 42, 55, 42,
+ 57, 42, 59, 42, 61, 42, 63, 42
+ )
+);
__m256i test_mm256_maskz_set1_epi8( __mmask32 __M, char __A) {
// CHECK-LABEL: test_mm256_maskz_set1_epi8
@@ -3195,6 +3932,18 @@ __m256i test_mm256_maskz_set1_epi8( __mmask32 __M, char __A) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
return _mm256_maskz_set1_epi8( __M, __A);
}
+TEST_CONSTEXPR(
+ match_v32qi(
+ _mm256_maskz_set1_epi8(
+ (__mmask32)0xAAAAAAAA,
+ (char)42
+ ),
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42
+ )
+);
__m256i test_mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A) {
@@ -3218,6 +3967,20 @@ __m256i test_mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_set1_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_mask_set1_epi16(
+ (__m256i)(__v16hi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 25, 26, 27, 28, 29, 30, 31, 32
+ },
+ (__mmask16)0xAAAA,
+ 42
+ ),
+ 1, 42, 3, 42, 5, 42, 7, 42,
+ 25, 42, 27, 42, 29, 42, 31, 42
+ )
+);
__m256i test_mm256_maskz_set1_epi16(__mmask16 __M, short __A) {
// CHECK-LABEL: test_mm256_maskz_set1_epi16
@@ -3240,6 +4003,16 @@ __m256i test_mm256_maskz_set1_epi16(__mmask16 __M, short __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_set1_epi16(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_maskz_set1_epi16(
+ (__mmask16)0xAAAA,
+ 42
+ ),
+ 0, 42, 0, 42,0, 42,0, 42,
+ 0, 42, 0, 42,0, 42,0, 42
+ )
+);
__m128i test_mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A) {
// CHECK-LABEL: test_mm_mask_set1_epi16
>From 9c31c74bd3ba74eb47b2bb3675bfc11c31c39501 Mon Sep 17 00:00:00 2001
From: GrumpyPigSkin <oliver61 at live.co.uk>
Date: Fri, 10 Oct 2025 16:20:20 +0100
Subject: [PATCH 6/6] [Headers][X86] Finished adding constexpr tests
---
clang/test/CodeGen/X86/avx512bw-builtins.c | 32 +-
clang/test/CodeGen/X86/avx512vlbw-builtins.c | 727 ++++++++++++++++++-
2 files changed, 713 insertions(+), 46 deletions(-)
diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c
index 40b46fffb80f9..5f3d9cbaf7656 100644
--- a/clang/test/CodeGen/X86/avx512bw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512bw-builtins.c
@@ -2519,7 +2519,7 @@ TEST_CONSTEXPR(
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
},
- (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__mmask64)0xFAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
116, 117, 118, 119, 120, 121, 122, 123, -128, -127, -126, -125, -124, -123, -122, -121,
@@ -2536,7 +2536,7 @@ TEST_CONSTEXPR(
1, -9, 3,-10, 5,-11, 7,-12, 9,-13, 11,-14, 13,-15, 15,-16,
17,-25, 19,-26, 21,-27, 23,-28, 25,-29, 27,-30, 29,-31, 31,-32,
33,-41, 35,-42, 37,-43, 39,-44, 41,-45, 43,-46, 45,-47, 47,-48,
- 49,-57, 51,-58, 53,-59, 55,-60, 57,-61, 59,-62, 61,-63, 63,-64
+ 49,-57, 51,-58, 53,-59, 55,-60, 57,-61, 59,-62, -90,-63, -89,-64
)
);
@@ -2549,7 +2549,7 @@ __m512i test_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B)
TEST_CONSTEXPR(
match_v64qi(
_mm512_maskz_unpackhi_epi8(
- (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__mmask64)0xFAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
116, 117, 118, 119, 120, 121, 122, 123, -128, -127, -126, -125, -124, -123, -122, -121,
@@ -2566,7 +2566,7 @@ TEST_CONSTEXPR(
0, -9, 0,-10, 0,-11, 0,-12, 0,-13, 0,-14, 0,-15, 0,-16,
0,-25, 0,-26, 0,-27, 0,-28, 0,-29, 0,-30, 0,-31, 0,-32,
0,-41, 0,-42, 0,-43, 0,-44, 0,-45, 0,-46, 0,-47, 0,-48,
- 0,-57, 0,-58, 0,-59, 0,-60, 0,-61, 0,-62, 0,-63, 0,-64
+ 0,-57, 0,-58, 0,-59, 0,-60, 0,-61, 0,-62, -90,-63, -89,-64
)
);
@@ -2593,7 +2593,7 @@ TEST_CONSTEXPR(
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32
},
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xFAAAAAAAu,
(__m512i)(__v32hi){
100, 101, 102, 103, 104, 105, 106, 107,
110, 111, 112, 113, 114, 115, 116, 117,
@@ -2610,7 +2610,7 @@ TEST_CONSTEXPR(
1, 204, 3, 205, 5, 206, 7, 207,
9, 214, 11, 215, 13, 216, 15, 217,
17, 224, 19, 225, 21, 226, 23, 227,
- 25, 234, 27, 235, 29, 236, 31, 237
+ 25, 234, 27, 235, 136, 236, 137, 237
)
);
@@ -2623,7 +2623,7 @@ __m512i test_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
TEST_CONSTEXPR(
match_v32hi(
_mm512_maskz_unpackhi_epi16(
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xFAAAAAAAu,
(__m512i)(__v32hi){
100, 101, 102, 103, 104, 105, 106, 107,
110, 111, 112, 113, 114, 115, 116, 117,
@@ -2640,7 +2640,7 @@ TEST_CONSTEXPR(
0, 204, 0, 205, 0, 206, 0, 207,
0, 214, 0, 215, 0, 216, 0, 217,
0, 224, 0, 225, 0, 226, 0, 227,
- 0, 234, 0, 235, 0, 236, 0, 237
+ 0, 234, 0, 235, 136, 236, 137, 237
)
);
@@ -2666,7 +2666,7 @@ TEST_CONSTEXPR(
33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
},
- (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__mmask64)0xFAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
-10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25,
@@ -2683,7 +2683,7 @@ TEST_CONSTEXPR(
1, -1, 3, -2, 5, -3, 7, -4, 9, -5, 11, -6, 13, -7, 15, -8,
17, 20, 19, 21, 21, 22, 23, 23, 25, 24, 27, 25, 29, 26, 31, 27,
33, 40, 35, 41, 37, 42, 39, 43, 41, 44, 43, 45, 45, 46, 47, 47,
- 49, 60, 51, 61, 53, 62, 55, 63, 57, 64, 59, 65, 61, 66, 63, 67
+ 49, 60, 51, 61, 53, 62, 55, 63, 57, 64, 59, 65, -56, 66, -57, 67
)
);
@@ -2696,7 +2696,7 @@ __m512i test_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B)
TEST_CONSTEXPR(
match_v64qi(
_mm512_maskz_unpacklo_epi8(
- (__mmask64)0xAAAAAAAAAAAAAAAA,
+ (__mmask64)0xFAAAAAAAAAAAAAAA,
(__m512i)(__v64qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
-10, -11, -12, -13, -14, -15, -16, -17, -18, -19, -20, -21, -22, -23, -24, -25,
@@ -2713,7 +2713,7 @@ TEST_CONSTEXPR(
0, -1, 0, -2, 0, -3, 0, -4, 0, -5, 0, -6, 0, -7, 0, -8,
0, 20, 0, 21, 0, 22, 0, 23, 0, 24, 0, 25, 0, 26, 0, 27,
0, 40, 0, 41, 0, 42, 0, 43, 0, 44, 0, 45, 0, 46, 0, 47,
- 0, 60, 0, 61, 0, 62, 0, 63, 0, 64, 0, 65, 0, 66, 0, 67
+ 0, 60, 0, 61, 0, 62, 0, 63, 0, 64, 0, 65, -56, 66, -57, 67
)
);
@@ -2739,7 +2739,7 @@ TEST_CONSTEXPR(
17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, 28, 29, 30, 31, 32
},
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xFAAAAAAAu,
(__m512i)(__v32hi){
100, 101, 102, 103, 104, 105, 106, 107,
110, 111, 112, 113, 114, 115, 116, 117,
@@ -2756,7 +2756,7 @@ TEST_CONSTEXPR(
1, 200, 3, 201, 5, 202, 7, 203,
9, 210, 11, 211, 13, 212, 15, 213,
17, 220, 19, 221, 21, 222, 23, 223,
- 25, 230, 27, 231, 29, 232, 31, 233
+ 25, 230, 27, 231, 132, 232, 133, 233
)
);
@@ -2770,7 +2770,7 @@ __m512i test_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B)
TEST_CONSTEXPR(
match_v32hi(
_mm512_maskz_unpacklo_epi16(
- (__mmask32)0xAAAAAAAAu,
+ (__mmask32)0xFAAAAAAAu,
(__m512i)(__v32hi){
100, 101, 102, 103, 104, 105, 106, 107,
110, 111, 112, 113, 114, 115, 116, 117,
@@ -2787,7 +2787,7 @@ TEST_CONSTEXPR(
0, 200, 0, 201,0, 202, 0, 203,
0, 210, 0, 211,0, 212, 0, 213,
0, 220, 0, 221,0, 222, 0, 223,
- 0, 230, 0, 231,0, 232, 0, 233
+ 0, 230, 0, 231,132, 232, 133, 233
)
);
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index 9ec1cd0ebeda6..c10f1b4cc2767 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -1063,12 +1063,45 @@ __m128i test_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_packs_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_packs_epi32(
+ (__mmask8)0xAA,
+ (__m128i)(__v4si){
+ 40000, -50000, 65535, -65536
+ },
+ (__m128i)(__v4si){
+ 0, 50000, 40000, -40000
+ }
+ ),
+ 0, -32768, 0, -32768, 0, 32767, 0, -32768
+ )
+);
+
__m128i test_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packs_epi32
// CHECK: @llvm.x86.sse2.packssdw
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_packs_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_packs_epi32(
+ (__m128i)(__v8hi){
+ 1, 2, 3, 4, 29, 30, 31, 32
+ },
+ (__mmask8)0xAA,
+ (__m128i)(__v4si){
+ 40000, -50000, 65535, -65536
+ },
+ (__m128i)(__v4si){
+ 0, 50000, 40000, -40000
+ }
+ ),
+ 1, -32768, 3, -32768, 29, 32767, 31, -32768
+ )
+);
+
__m256i test_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packs_epi32
// CHECK: @llvm.x86.avx2.packssdw
@@ -1096,7 +1129,7 @@ TEST_CONSTEXPR(match_v16hi(_mm256_mask_packs_epi32(
1, 2, 3, 4, 5, 6, 7, 8,
25, 26, 27, 28, 29, 30, 31, 32
},
-(__mmask32)0xAAAA,
+(__mmask16)0xAAAA,
(__m256i)(__v8si){
40000, -50000, 32767, -32768, 32768, -32769, 65535, -65536
}, (__m256i)(__v8si){
@@ -1111,12 +1144,47 @@ __m128i test_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_packs_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_maskz_packs_epi16(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v8hi){
+ 130, -200, 127, -128, 255, -255, 127, -128
+ },
+ (__m128i)(__v8hi){
+ 0, 1, -1, 255, -128, 90, -90, -32768
+ }),
+ 0, -128, 0, -128, 0, -128, 0, -128,
+ 0, 1, 0, 127, 0, 90, 0, -128
+ )
+);
+
__m128i test_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packs_epi16
// CHECK: @llvm.x86.sse2.packsswb
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_packs_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_mask_packs_epi16(
+ (__m128i)(__v16qi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v8hi){
+ 130, -200, 127, -128, 255, -255, 127, -128
+ },
+ (__m128i)(__v8hi){
+ 0, 1, -1, 255, -128, 90, -90, -32768
+ }),
+ 1, -128, 3, -128, 5, -128, 7, -128,
+ 57, 1, 59, 127, 61, 90, 63, -128
+ )
+);
+
+
__m256i test_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packs_epi16
// CHECK: @llvm.x86.avx2.packsswb
@@ -1179,6 +1247,23 @@ __m128i test_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_packus_epi32(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hu(
+ _mm_mask_packus_epi32(
+ (__m128i)(__v8hu){
+ 1, 2, 3, 4, 5, 6, 7, 8
+ },
+ (__mmask8)0xAA,
+ (__m128i)(__v4si){
+ 40000, -50000, 32767, -32768
+ },
+ (__m128i)(__v4si){
+ 0, 1, -1, 65536
+ }
+ ),
+ 1, 0, 3, 0, 5, 1, 7, 65535
+ )
+);
__m128i test_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_packus_epi32
@@ -1186,6 +1271,20 @@ __m128i test_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_packus_epi32(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hu(
+ _mm_maskz_packus_epi32(
+ (__mmask8)0xAA,
+ (__m128i)(__v4si){
+ 40000, -50000, 32767, -32768
+ },
+ (__m128i)(__v4si){
+ 0, 1, -1, 65536
+ }
+ ),
+ 0, 0, 0, 0, 0, 1, 0, 65535
+ )
+);
__m256i test_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packus_epi32
@@ -1232,6 +1331,21 @@ __m128i test_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_packus_epi16(__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qu(
+ _mm_maskz_packus_epi16(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v8hi){
+ -1, 0, 1, 127, 128, 255, 256, -200
+ },
+ (__m128i)(__v8hi){
+ 0, 1, -1, 255, -129, 128, 20000, -32768
+ }
+ ),
+ 0, 0, 0, 127, 0, 255, 0, 0,
+ 0, 1, 0, 255, 0, 128, 0, 0
+ )
+);
__m128i test_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_packus_epi16
@@ -1239,6 +1353,25 @@ __m128i test_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m12
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_packus_epi16(__W,__M,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qu(
+ _mm_mask_packus_epi16(
+ (__m128i)(__v16qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v8hi){
+ -1, 0, 1, 127, 128, 255, 256, -200
+ },
+ (__m128i)(__v8hi){
+ 0, 1, -1, 255, -129, 128, 20000, -32768
+ }
+ ),
+ 1, 0, 3, 127, 5, 255, 7, 0,
+ 9, 1, 11, 255, 13, 128, 15, 0
+ )
+);
__m256i test_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_packus_epi16
@@ -1294,12 +1427,48 @@ __m128i test_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_adds_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(
+ _mm_mask_adds_epi8(
+ (__m128i)(__v16qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -100, -50, +100, -20, -80, +120, -120, -20
+},
+ (__m128i)(__v16qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ +50, +80, -50, +110, +60, 120, +20, -120
+ }
+ ),
+ 1, +2, 3, +6, 5, +10, 7, +14,
+ 57, +30, 59, +90, 61, +127, 63, -128
+));
+
__m128i test_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epi8
// CHECK: @llvm.sadd.sat.v16i8
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_adds_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qi(
+ _mm_maskz_adds_epi8(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ -100, -50, +100, -20, -80, +120, -120, -20
+},
+ (__m128i)(__v16qs){
+ 0, +1, -2, +3, -4, +5, -6, +7,
+ +50, +80, -50, +110, +60, 120, +20, -120
+ }
+ ),
+ 0, +2, 0, +6, 0, +10, 0, +14,
+ 0, +30, 0,+90, 0, +127, 0, -128
+));
+
__m256i test_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epi8
// CHECK: @llvm.sadd.sat.v32i8
@@ -1369,12 +1538,46 @@ __m128i test_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_adds_epi16(__W,__U,__A,__B);
}
+
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_adds_epi16(
+ (__m128i)(__v8hi){
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ },
+ (__mmask8)0xAA,
+ (__m128i)(__v8hi){
+ -24, +25, -26, +27, +32000, -32000, +32000, +32000
+ },
+ (__m128i)(__v8hi){
+ -24, +25, -26, +27, +800, -800, -800, +800
+ }
+ ),
+ 9, +50, 11, +54, 13, -32768, 15, +32767
+ )
+);
+
__m128i test_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epi16
// CHECK: @llvm.sadd.sat.v8i16
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_adds_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_adds_epi16(
+ (__mmask8)0xAA,
+ (__m128i)(__v8hi){
+ -24, +25, -26, +27, +32000, -32000, +32000, +32000
+ },
+ (__m128i)(__v8hi){
+ -24, +25, -26, +27, +800, -800, -800, +800
+ }
+ ),
+ 0, +50, 0, +54, 0, -32768, 0, +32767
+ )
+);
+
__m256i test_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epi16
// CHECK: @llvm.sadd.sat.v16i16
@@ -1435,6 +1638,21 @@ __m128i test_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_adds_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_mask_adds_epu8((__m128i)(__v16qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qu){
+ 0, 0, 0, 0, 0, 0, 0, 0,
++255, +255, +255, +255, +255, +255, +255, +255
+}, (__m128i)(__v16qu){
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255}),
+
+ 1, +63, 3, +127, 5, +191, 7, +255,
+57, +255, 59, +255, 61, +255, 63, +255
+));
__m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epu8
// CHECK-NOT: @llvm.x86.sse2.paddus.b
@@ -1442,6 +1660,17 @@ __m128i test_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_adds_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(match_v16qu(_mm_maskz_adds_epu8(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qu){
+ 0, 0, 0, 0, 0, 0, 0, 0,
++255, +255, +255, +255, +255, +255, +255, +255
+}, (__m128i)(__v16qu){
+0, +63, +64, +127, +128, +191, +192, +255,
+0, +63, +64, +127, +128, +191, +192, +255}),
+0, +63, 0, +127, 0, +191, 0, +255,
+0, +255, 0, +255, 0, +255, 0, +255
+));
__m256i test_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epu8
// CHECK-NOT: @llvm.x86.avx2.paddus.b
@@ -1507,6 +1736,24 @@ __m128i test_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_adds_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+match_v8hu(
+_mm_mask_adds_epu16(
+(__m128i)(__v8hu){
+ 25, 26, 27, 28, 29, 30, 31, 32
+},
+(__mmask8)0xAA,
+(__m128i)(__v8hu){
+ +16384, +16384, +16384, +16384,
++49152, +49152, +49152, +49152
+},
+(__m128i)(__v8hu){
+ 0, +16384, +32767, +32768,
++32767, +32768, +49152, +65535
+}),
+ 25, +32768, 27, +49152,
+ 29, +65535, 31, +65535
+));
__m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_adds_epu16
// CHECK-NOT: @llvm.x86.sse2.paddus.w
@@ -1514,6 +1761,21 @@ __m128i test_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_adds_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+match_v8hu(
+_mm_maskz_adds_epu16(
+(__mmask8)0xAA,
+(__m128i)(__v8hu){
+ +16384, +16384, +16384, +16384,
++49152, +49152, +49152, +49152
+},
+(__m128i)(__v8hu){
+ 0, +16384, +32767, +32768,
++32767, +32768, +49152, +65535
+}),
+ 0, +32768, 0, +49152,
+ 0, +65535, 0, +65535
+));
__m256i test_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_adds_epu16
// CHECK-NOT: @llvm.x86.avx2.paddus.w
@@ -2012,12 +2274,50 @@ __m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_subs_epi8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_mask_subs_epi8(
+ (__m128i)(__v16qs){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qs){
+ 1, -100, 3, 4, 5, - 6, 7, 100,
+ 57, -100, 59, 60, 61, -62, 63, 100
+ },
+ (__m128i)(__v16qs){
+ 1, 100, 3, 4, 5, 6, 7, -100,
+ 57, 100, 59, 60, 61, 62, 63, -100
+ }
+ ),
+ 1, -128, 3, 0, 5, -12, 7, 127,
+ 57, -128, 59, 0, 61, -124, 63, 127
+ )
+);
__m128i test_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epi8
// CHECK: @llvm.ssub.sat.v16i8
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_subs_epi8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_maskz_subs_epi8(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qs){
+ 1, -100, 3, 4, 5, - 6, 7, 100,
+ 57, -100, 59, 60, 61, -62, 63, 100
+ },
+ (__m128i)(__v16qs){
+ 1, 100, 3, 4, 5, 6, 7, -100,
+ 57, 100, 59, 60, 61, 62, 63, -100
+ }
+ ),
+ 0, -128, 0, 0, 0, -12,0, 127,
+ 0, -128, 0, 0, 0, -124,0, 127
+ )
+);
__m256i test_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epi8
// CHECK: @llvm.ssub.sat.v32i8
@@ -2090,6 +2390,24 @@ __m128i test_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_subs_epi16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_subs_epi16(
+ (__m128i)(__v8hi){
+ 1, 2, 3, 4, 29, 30, 31, 32
+ },
+ (__mmask8)0xAA,
+ (__m128i)(__v8hi){
+ 1, -30000, 3, 30000, 29, -30, 31, 32
+ },
+ (__m128i)(__v8hi){
+ 1, 30000, 3, -30000, 29, 30, 31, -32
+ }
+ ),
+ 1, -32768, 3, 32767, 29, -60, 31, 64
+ )
+);
+
__m128i test_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epi16
// CHECK: @llvm.ssub.sat.v8i16
@@ -2097,23 +2415,19 @@ __m128i test_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
return _mm_maskz_subs_epi16(__U,__A,__B);
}
TEST_CONSTEXPR(
- match_v16hi(
- _mm256_maskz_subs_epi16(
- (__mmask16)0xAAAA,
- (__m256i)(__v16hi){
- 1, -30000, 3, 30000, 5, -6, 7, 8,
- 25, -30000, 27, 30000, 29, -30, 31, 32
+ match_v8hi(
+ _mm_maskz_subs_epi16(
+ (__mmask8)0xAA,
+ (__m128i)(__v8hi){
+ 1, -30000, 3, 30000, 29, -30, 31, 32
},
- (__m256i)(__v16hi){
- 1, 30000, 3, -30000, 5, 6, 7, -8,
- 25, 30000, 27, -30000, 29, 30, 31, -32
+ (__m128i)(__v8hi){
+ 1, 30000, 3, -30000, 29, 30, 31, -32
}
),
- 0, -32768, 0, 32767, 0, -12, 0, 16,
- 0, -32768, 0, 32767, 0, -60, 0, 64
+ 0, -32768, 0, 32767, 0, -60, 0, 64
)
);
-
__m256i test_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epi16
// CHECK: @llvm.ssub.sat.v16i16
@@ -2148,6 +2462,24 @@ __m256i test_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_subs_epi16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16hi(
+ _mm256_maskz_subs_epi16(
+ (__mmask16)0xAAAA,
+ (__m256i)(__v16hi){
+ 1, -30000, 3, 30000, 5, -6, 7, 8,
+ 25, -30000, 27, 30000, 29, -30, 31, 32
+ },
+ (__m256i)(__v16hi){
+ 1, 30000, 3, -30000, 5, 6, 7, -8,
+ 25, 30000, 27, -30000, 29, 30, 31, -32
+ }
+ ),
+ 0, -32768, 0, 32767, 0, -12, 0, 16,
+ 0, -32768, 0, 32767, 0, -60, 0, 64
+ )
+);
+
__m128i test_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_subs_epu8
// CHECK-NOT: @llvm.x86.sse2.psubus.b
@@ -2155,6 +2487,27 @@ __m128i test_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_subs_epu8(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qu(
+ _mm_mask_subs_epu8(
+ (__m128i)(__v16qu){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 57, 58, 59, 60, 61, 62, 63, 64
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qu){
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255
+ },
+ (__m128i)(__v16qu){
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255
+ }
+ ),
+ 1, 200, 3, 0, 5, 0, 7, 254,
+ 57, 0,59, 1, 61, 1, 63, 0
+ )
+);
__m128i test_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epu8
// CHECK-NOT: @llvm.x86.sse2.psubus.b
@@ -2162,6 +2515,24 @@ __m128i test_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_subs_epu8(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v16qu(
+ _mm_maskz_subs_epu8(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qu){
+ 0, 250, 0, 128, 0, 20, 0, 255, 0,
+ 0, 0, 1, 0, 100, 0, 255
+ },
+ (__m128i)(__v16qu){
+ 0, 50, 0, 128, 0, 30, 0, 1,
+ 0, 1, 0, 0, 0, 99, 0, 255
+ }
+ ),
+ 0, 200,0, 0,0, 0, 0, 254,
+ 0, 0,0, 1,0, 1, 0, 0
+ )
+);
+
__m256i test_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epu8
// CHECK-NOT: @llvm.x86.avx2.psubus.b
@@ -2235,6 +2606,23 @@ __m128i test_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_subs_epu16(__W,__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hu(
+ _mm_mask_subs_epu16(
+ (__m128i)(__v8hu){
+ 101, 102, 103, 104, 129, 130, 131, 132
+ },
+ (__mmask8)0xAAu,
+ (__m128i)(__v8hu){
+ 0, 65000, 0, 40000, 0, 1, 0, 50000
+ },
+ (__m128i)(__v8hu){
+ 0, 5000, 0, 60000, 0, 0, 0, 25000
+ }
+ ),
+ 101, 60000, 103, 0, 129, 1, 131, 25000
+ )
+);
__m128i test_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_subs_epu16
// CHECK-NOT: @llvm.x86.sse2.psubus.w
@@ -2242,6 +2630,20 @@ __m128i test_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_subs_epu16(__U,__A,__B);
}
+TEST_CONSTEXPR(
+ match_v8hu(
+ _mm_maskz_subs_epu16(
+ (__mmask8)0xAAu,
+ (__m128i)(__v8hu){
+ 0, 65000, 0, 40000, 0, 1, 0, 50000
+ },
+ (__m128i)(__v8hu){
+ 0, 5000, 0, 60000, 0, 0, 0, 25000
+ }
+ ),
+ 0, 60000, 0, 0, 0, 1, 0, 25000
+ )
+);
__m256i test_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_subs_epu16
// CHECK-NOT: @llvm.x86.avx2.psubus.w
@@ -2616,6 +3018,23 @@ __m128i test_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m1
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_unpackhi_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_mask_unpackhi_epi8(
+ (__m128i)(__v16qs){
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ },
+ (__mmask16)0xFAAA,
+ (__m128i)(__v16qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115
+ },
+ (__m128i)(__v16qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+ }
+ ),
+ 1, -9, 3,-10, 5,-11, 7,-12, 9,-13, 11,-14, 114,-15, 115,-16
+ )
+);
__m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpackhi_epi8
@@ -2623,6 +3042,20 @@ __m128i test_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_unpackhi_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_maskz_unpackhi_epi8(
+ (__mmask16)0xFAAA,
+ (__m128i)(__v16qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115
+ },
+ (__m128i)(__v16qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+ }
+ ),
+ 0, -9, 0,-10, 0,-11, 0,-12, 0,-13, 0,-14, 114,-15, 115,-16
+ )
+);
__m256i test_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpackhi_epi8
@@ -2637,7 +3070,7 @@ TEST_CONSTEXPR(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
},
- (__mmask32)0xAAAAAAAA,
+ (__mmask32)0xFAAAAAAA,
(__m256i)(__v32qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
-104,-103,-102,-101,-100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89
@@ -2648,7 +3081,7 @@ TEST_CONSTEXPR(
}
),
1, -9, 3,-10, 5,-11, 7,-12, 9,-13, 11,-14, 13,-15, 15,-16,
- 49,-57, 51,-58, 53,-59, 55,-60, 57,-61, 59,-62, 61,-63, 63,-64
+ 49,-57, 51,-58, 53,-59, 55,-60, 57,-61, 59,-62, -90,-63, -89,-64
)
);
@@ -2661,7 +3094,7 @@ __m256i test_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B)
TEST_CONSTEXPR(
match_v32qi(
_mm256_maskz_unpackhi_epi8(
- (__mmask32)0xAAAAAAAA,
+ (__mmask32)0xFAAAAAAA,
(__m256i)(__v32qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
-104,-103,-102,-101,-100, -99, -98, -97, -96, -95, -94, -93, -92, -91, -90, -89
@@ -2672,7 +3105,7 @@ TEST_CONSTEXPR(
}
),
0, -9, 0,-10,0,-11,0,-12,0,-13,0,-14,0,-15,0,-16,
- 0,-57, 0,-58,0,-59,0,-60,0,-61,0,-62,0,-63,0,-64
+ 0,-57, 0,-58,0,-59,0,-60,0,-61,0,-62,-90,-63,-89,-64
)
);
@@ -2682,6 +3115,23 @@ __m128i test_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m1
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_unpackhi_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_unpackhi_epi16(
+ (__m128i)(__v8hi){
+ 1, 2, 3, 4, 5, 6, 7, 8
+ },
+ (__mmask8)0xFA,
+ (__m128i)(__v8hi){
+ 100, 101, 102, 103, 104, 105, 106, 107
+ },
+ (__m128i)(__v8hi){
+ 200, 201, 202, 203, 204, 205, 206, 207
+ }
+ ),
+ 1, 204, 3, 205, 106, 206, 107, 207
+ )
+);
__m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpackhi_epi16
@@ -2689,6 +3139,20 @@ __m128i test_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_unpackhi_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_unpackhi_epi16(
+ (__mmask8)0xFA,
+ (__m128i)(__v8hi){
+ 100, 101, 102, 103, 104, 105, 106, 107
+ },
+ (__m128i)(__v8hi){
+ 200, 201, 202, 203, 204, 205, 206, 207
+ }
+ ),
+ 0, 204, 0, 205, 106, 206, 107, 207
+ )
+);
__m256i test_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpackhi_epi16
@@ -2703,7 +3167,7 @@ TEST_CONSTEXPR(
1, 2, 3, 4, 5, 6, 7, 8,
25, 26, 27, 28, 29, 30, 31, 32
},
- (__mmask16)0xAAAAu,
+ (__mmask16)0xFAAAu,
(__m256i)(__v16hi){
100, 101, 102, 103, 104, 105, 106, 107,
130, 131, 132, 133, 134, 135, 136, 137
@@ -2714,7 +3178,7 @@ TEST_CONSTEXPR(
}
),
1, 204, 3, 205, 5, 206, 7, 207,
- 25, 234, 27, 235, 29, 236, 31, 237
+ 25, 234, 27, 235, 136, 236, 137, 237
)
);
@@ -2727,7 +3191,7 @@ __m256i test_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B)
TEST_CONSTEXPR(
match_v16hi(
_mm256_maskz_unpackhi_epi16(
- (__mmask16)0xAAAAu,
+ (__mmask16)0xFAAAu,
(__m256i)(__v16hi){
100, 101, 102, 103, 104, 105, 106, 107,
130, 131, 132, 133, 134, 135, 136, 137
@@ -2738,7 +3202,7 @@ TEST_CONSTEXPR(
}
),
0, 204, 0, 205, 0, 206, 0, 207,
- 0, 234, 0, 235, 0, 236, 0, 237
+ 0, 234, 0, 235, 136, 236, 137, 237
)
);
@@ -2748,6 +3212,23 @@ __m128i test_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m1
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_unpacklo_epi8(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_mask_unpacklo_epi8(
+ (__m128i)(__v16qs){
+ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
+ },
+ (__mmask16)0xFAAA,
+ (__m128i)(__v16qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115
+ },
+ (__m128i)(__v16qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+ }
+ ),
+ 1, -1, 3, -2, 5, -3, 7, -4, 9, -5, 11, -6, 106, -7, 107, -8
+ )
+);
__m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpacklo_epi8
@@ -2755,6 +3236,20 @@ __m128i test_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_unpacklo_epi8(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_maskz_unpacklo_epi8(
+ (__mmask16)0xFAAA,
+ (__m128i)(__v16qs){
+ 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115
+ },
+ (__m128i)(__v16qs){
+ -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16
+ }
+ ),
+ 0, -1, 0, -2, 0, -3, 0, -4, 0, -5, 0, -6, 106, -7, 107, -8
+ )
+);
__m256i test_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpacklo_epi8
@@ -2769,7 +3264,7 @@ TEST_CONSTEXPR(
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
},
- (__mmask32)0xAAAAAAAAA,
+ (__mmask32)0xFAAAAAAA,
(__m256i)(__v32qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
-50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65
@@ -2780,7 +3275,7 @@ TEST_CONSTEXPR(
}
),
1, -1, 3, -2, 5, -3, 7, -4, 9, -5, 11, -6, 13, -7, 15, -8,
- 49, 60, 51, 61, 53, 62, 55, 63, 57, 64, 59, 65, 61, 66, 63, 67
+ 49, 60, 51, 61, 53, 62, 55, 63, 57, 64, 59, 65, -56, 66, -57, 67
)
);
@@ -2793,7 +3288,7 @@ __m256i test_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B)
TEST_CONSTEXPR(
match_v32qi(
_mm256_maskz_unpacklo_epi8(
- (__mmask32)0xAAAAAAAAA,
+ (__mmask32)0xFAAAAAAA,
(__m256i)(__v32qs){
100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
-50, -51, -52, -53, -54, -55, -56, -57, -58, -59, -60, -61, -62, -63, -64, -65
@@ -2803,8 +3298,8 @@ TEST_CONSTEXPR(
60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75
}
),
- 0, -1,0, -2,0, -3,0, -4,0, -5,0, -6,0, -7,0, -8,
- 0, 60,0, 61,0, 62,0, 63,0, 64,0, 65,0, 66,0, 67
+ 0, -1, 0, -2, 0, -3, 0, -4, 0, -5, 0, -6, 0, -7, 0, -8,
+ 0, 60, 0, 61, 0, 62, 0, 63, 0, 64, 0, 65, -56, 66, -57, 67
)
);
@@ -2814,6 +3309,23 @@ __m128i test_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m1
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_unpacklo_epi16(__W, __U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_unpacklo_epi16(
+ (__m128i)(__v8hi){
+ 1, 2, 3, 4, 5, 6, 7, 8
+ },
+ (__mmask8)0xFAu,
+ (__m128i)(__v8hi){
+ 100, 101, 102, 103, 104, 105, 106, 107
+ },
+ (__m128i)(__v8hi){
+ 200, 201, 202, 203, 204, 205, 206, 207
+ }
+ ),
+ 1, 200, 3, 201, 102, 202, 103, 203
+ )
+);
__m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_unpacklo_epi16
@@ -2821,6 +3333,20 @@ __m128i test_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_unpacklo_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_unpacklo_epi16(
+ (__mmask8)0xFAu,
+ (__m128i)(__v8hi){
+ 100, 101, 102, 103, 104, 105, 106, 107
+ },
+ (__m128i)(__v8hi){
+ 200, 201, 202, 203, 204, 205, 206, 207
+ }
+ ),
+ 0, 200, 0, 201, 102, 202, 103, 203
+ )
+);
__m256i test_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_unpacklo_epi16
@@ -2835,7 +3361,7 @@ TEST_CONSTEXPR(
1, 2, 3, 4, 5, 6, 7, 8,
25, 26, 27, 28, 29, 30, 31, 32
},
- (__mmask16)0xAAAAu,
+ (__mmask16)0xFAAAu,
(__m256i)(__v16hi){
100, 101, 102, 103, 104, 105, 106, 107,
130, 131, 132, 133, 134, 135, 136, 137
@@ -2846,7 +3372,7 @@ TEST_CONSTEXPR(
}
),
1, 200, 3, 201, 5, 202, 7, 203,
- 25, 230, 27, 231, 29, 232, 31, 233
+ 25, 230, 27, 231, 132, 232, 133, 233
)
);
@@ -2859,7 +3385,7 @@ __m256i test_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B)
TEST_CONSTEXPR(
match_v16hi(
_mm256_maskz_unpacklo_epi16(
- (__mmask16)0xAAAAu,
+ (__mmask16)0xFAAAu,
(__m256i)(__v16hi){
100, 101, 102, 103, 104, 105, 106, 107,
130, 131, 132, 133, 134, 135, 136, 137
@@ -2870,7 +3396,7 @@ TEST_CONSTEXPR(
}
),
0, 200,0, 201,0, 202,0, 203,
- 0, 230,0, 231,0, 232,0, 233
+ 0, 230,0, 231,132, 232,133, 233
)
);
@@ -2880,6 +3406,16 @@ __m128i test_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_cvtepi8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_cvtepi8_epi16(
+ _mm_set1_epi16(-777),
+ (__mmask8)0xA5,
+ (__m128i)(__v16qs){1, -2, 3, -4, 5, -6, 7, -8, 9, 10, 11, 12, 13, 14, 15, 16}
+ ),
+ 1, -777, 3, -777, -777, -6, -777, -8
+ )
+);
__m128i test_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_cvtepi8_epi16
@@ -2887,6 +3423,15 @@ __m128i test_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_cvtepi8_epi16(__U, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_cvtepi8_epi16(
+ (__mmask8)0xA5,
+ (__m128i)(__v16qs){1, -2, 3, -4, 5, -6, 7, -8, 9, 10, 11, 12, 13, 14, 15, 16}
+ ),
+ 1, 0, 3, 0, 0, -6, 0, -8
+ )
+);
__m256i test_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_cvtepi8_epi16
@@ -2917,6 +3462,16 @@ __m128i test_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_cvtepu8_epi16(__W, __U, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_cvtepu8_epi16(
+ _mm_set1_epi16(-777),
+ (__mmask8)0xA5,
+ (__m128i)(__v16qu){25, 26, 27, 28, 29, 30, 31, 32, 0,0,0,0,0,0,0,0}
+ ),
+ 25, -777, 27, -777, -777, 30, -777, 32
+ )
+);
__m128i test_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_cvtepu8_epi16
@@ -2924,6 +3479,15 @@ __m128i test_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_cvtepu8_epi16(__U, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_cvtepu8_epi16(
+ (__mmask8)0xA5,
+ (__m128i)(__v16qu){25, 26, 27, 28, 29, 30, 31, 32, 0,0,0,0,0,0,0,0}
+ ),
+ 25, 0, 27, 0, 0, 30, 0, 32
+ )
+);
__m256i test_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_cvtepu8_epi16
@@ -3020,6 +3584,8 @@ __m256i test_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_sll_epi16(__U, __A, __B);
}
+TEST_CONSTEXPR(match_v8hi(_mm_maskz_slli_epi16((__mmask8)0xAA, (__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 20), 0, 0, 0, 0, 0, 0, 0, 0));
+
__m128i test_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK-LABEL: test_mm_mask_slli_epi16
@@ -3027,6 +3593,7 @@ __m128i test_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_slli_epi16(__W, __U, __A, 5);
}
+TEST_CONSTEXPR(match_v8hi(_mm_mask_slli_epi16((__m128i)(__v8hi){100, 101, 102, 103, 104, 105, 106, 107}, (__mmask8)0xAA, (__m128i)(__v8hi){0, 1, 2, 3, 4, 5, 6, 7}, 20), 100, 0, 102, 0, 104, 0, 106, 0));
__m128i test_mm_mask_slli_epi16_2(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) {
// CHECK-LABEL: test_mm_mask_slli_epi16_2
@@ -3681,6 +4248,23 @@ __m128i test_mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_broadcastb_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_mask_broadcastb_epi8(
+ (__m128i)(__v16qs){
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 56, 57, 58, 59, 60, 61, 62, 63
+ },
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qs){
+ -120, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+ }
+ ),
+ 0, -120, 2, -120, 4, -120, 6, -120,
+ 56, -120, 58, -120, 60, -120, 62, -120
+ )
+);
__m128i test_mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_broadcastb_epi8
@@ -3688,6 +4272,19 @@ __m128i test_mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_broadcastb_epi8(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_maskz_broadcastb_epi8(
+ (__mmask16)0xAAAA,
+ (__m128i)(__v16qs){
+ -120, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15
+ }
+ ),
+ 0, -120, 0, -120, 0, -120, 0, -120,
+ 0, -120, 0, -120, 0, -120, 0, -120
+ )
+);
__m256i test_mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_broadcastb_epi8
@@ -3745,6 +4342,20 @@ __m128i test_mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_broadcastw_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_broadcastw_epi16(
+ (__m128i)(__v8hi){
+ 0, 1, 2, 3, 4, 5, 6, 7
+ },
+ (__mmask8)0xAA,
+ (__m128i)(__v8hi){
+ -120, 1, 2, 3, 4, 5, 6, 7
+ }
+ ),
+ 0, -120, 2, -120, 4, -120, 6, -120
+ )
+);
__m128i test_mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A) {
// CHECK-LABEL: test_mm_maskz_broadcastw_epi16
@@ -3752,6 +4363,17 @@ __m128i test_mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_broadcastw_epi16(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_broadcastw_epi16(
+ (__mmask8)0xAA,
+ (__m128i)(__v8hi){
+ -120, 1, 2, 3, 4, 5, 6, 7
+ }
+ ),
+ 0, -120, 0, -120, 0, -120, 0, -120
+ )
+);
__m256i test_mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A) {
// CHECK-LABEL: test_mm256_mask_broadcastw_epi16
@@ -3817,6 +4439,20 @@ __m128i test_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A){
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_mask_set1_epi8(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_mask_set1_epi8(
+ (__m128i)(__v16qi){
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16
+ },
+ (__mmask16)0xAAAA,
+ (char)42
+ ),
+ 1, 42, 3, 42, 5, 42, 7, 42,
+ 9, 42, 11, 42, 13, 42, 15, 42
+ )
+);
__m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
// CHECK-LABEL: test_mm_maskz_set1_epi8
// CHECK: insertelement <16 x i8> poison, i8 %{{.*}}, i32 0
@@ -3838,6 +4474,16 @@ __m128i test_mm_maskz_set1_epi8 ( __mmask16 __M, char __A){
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
return _mm_maskz_set1_epi8( __M, __A);
}
+TEST_CONSTEXPR(
+ match_v16qi(
+ _mm_maskz_set1_epi8(
+ (__mmask16)0xAAAA,
+ (char)42
+ ),
+ 0, 42, 0, 42, 0, 42, 0, 42,
+ 0, 42, 0, 42, 0, 42, 0, 42
+ )
+);
__m256i test_mm256_mask_set1_epi8(__m256i __O, __mmask32 __M, char __A) {
// CHECK-LABEL: test_mm256_mask_set1_epi8
@@ -4027,6 +4673,18 @@ __m128i test_mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_set1_epi16(__O, __M, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_mask_set1_epi16(
+ (__m128i)(__v8hi){
+ 1, 2, 3, 4, 5, 6, 7, 8
+ },
+ (__mmask8)0xAA,
+ 42
+ ),
+ 1, 42, 3, 42, 5, 42, 7, 42
+ )
+);
__m128i test_mm_maskz_set1_epi16(__mmask8 __M, short __A) {
// CHECK-LABEL: test_mm_maskz_set1_epi16
@@ -4041,6 +4699,15 @@ __m128i test_mm_maskz_set1_epi16(__mmask8 __M, short __A) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_set1_epi16(__M, __A);
}
+TEST_CONSTEXPR(
+ match_v8hi(
+ _mm_maskz_set1_epi16(
+ (__mmask8)0xAA,
+ 42
+ ),
+ 0, 42, 0, 42, 0, 42, 0, 42
+ )
+);
__m128i test_mm_permutexvar_epi16(__m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_permutexvar_epi16
// CHECK: @llvm.x86.avx512.permvar.hi.128
More information about the cfe-commits
mailing list