[clang] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add AVX512 VPTERNLOGD/VPTERNLOGQ intrinsics to be used in constexpr (PR #158703)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Sep 22 21:20:52 PDT 2025
================
@@ -6246,40 +6246,191 @@ __m512i test_mm512_ternarylogic_epi32(__m512i __A, __m512i __B, __m512i __C) {
// CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 240)
return _mm512_ternarylogic_epi32(__A, __B, __C, _MM_TERNLOG_A);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA), // A ? B : C
+ 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC, 0xB, 0xC));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE), // A | B | C
+ 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80), // A & B & C
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512i test_mm512_mask_ternarylogic_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_mask_ternarylogic_epi32
// CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 204)
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_ternarylogic_epi32(__A, __U, __B, __C, _MM_TERNLOG_B);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x1, 0x0, 0x2, 0x0, 0x3, 0x0, 0x4, 0x0,
+ 0x5, 0x0, 0x6, 0x0, 0x7, 0x0, 0x8, 0x0})),
+ (__mmask16)0xA55A,
+ ((__m512i)((__v16si){0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10, 0x11,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19})),
+ ((__m512i)((__v16si){0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,
+ 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10})),
+ (unsigned char)0xCA), // A ? B : C
+ 0x1, 0x2, 0x2, 0x4, 0x6, 0x0, 0x3, 0x0, 0x8, 0x0, 0xD, 0x0, 0x7, 0xE, 0x8, 0x10));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ ((__m512i)((__v16si){0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
+ 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF})),
+ (__mmask16)0x0F0F,
+ ((__m512i)((__v16si){0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8,
+ 0x1, 0x2, 0x4, 0x8, 0x1, 0x2, 0x4, 0x8})),
+ ((__m512i)((__v16si){0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80,
+ 0x10, 0x20, 0x40, 0x80, 0x10, 0x20, 0x40, 0x80})),
+ (unsigned char)0xFE), // A | B | C
+ 0x11, 0x23, 0x46, 0x8B, 0x4, 0x5, 0x6, 0x7, 0x19, 0x2B, 0x4E, 0x8B, 0xC, 0xD, 0xE, 0xF));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_mask_ternarylogic_epi32(
+ ((__m512i)((__v16si){0xF, 0x7, 0x3, 0x1, 0xF, 0x7, 0x3, 0x1,
+ 0xFF, 0xF, 0xF0, 0xAA, 0x55, 0xCC, 0x33, 0xFF})),
+ (__mmask16)0xAAAA,
+ ((__m512i)((__v16si){0xE, 0x7, 0x2, 0x1, 0xF, 0x0, 0x3, 0x0,
+ 0xF, 0xF0, 0xFF, 0x55, 0x55, 0x33, 0x33, 0xF})),
+ ((__m512i)((__v16si){0xD, 0x7, 0x0, 0x1, 0xF, 0x7, 0x0, 0x1,
+ 0xF0, 0xF, 0xF, 0xFF, 0xF, 0xCC, 0x33, 0xF0})),
+ (unsigned char)0x80), // A & B & C
+ 0xF, 0x7, 0x3, 0x1, 0xF, 0x0, 0x3, 0x0, 0xFF, 0x0, 0xF0, 0x0, 0x55, 0x0, 0x33, 0x0));
__m512i test_mm512_maskz_ternarylogic_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_maskz_ternarylogic_epi32
// CHECK: @llvm.x86.avx512.pternlog.d.512({{.*}}, i32 170)
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> zeroinitializer
return _mm512_maskz_ternarylogic_epi32(__U, __A, __B, __C, _MM_TERNLOG_C);
}
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0x3333,
+ ((__m512i)((__v16si){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ ((__m512i)((__v16si){0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB, 0xB})),
+ ((__m512i)((__v16si){0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC, 0xC})),
+ (unsigned char)0xCA), // A ? B : C
+ 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0, 0xB, 0xC, 0x0, 0x0));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0xCCCC,
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0xFE), // A | B | C
+ 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF, 0x0, 0x0, 0xF, 0xF));
+TEST_CONSTEXPR(match_v16si(
+ _mm512_maskz_ternarylogic_epi32(
+ (__mmask16)0x5555,
+ ((__m512i)((__v16si){0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9, 0x9})),
+ ((__m512i)((__v16si){0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4, 0x4})),
+ ((__m512i)((__v16si){0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2, 0x2})),
+ (unsigned char)0x80), // A & B & C
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0));
__m512i test_mm512_ternarylogic_epi64(__m512i __A, __m512i __B, __m512i __C) {
// CHECK-LABEL: test_mm512_ternarylogic_epi64
// CHECK: @llvm.x86.avx512.pternlog.q.512({{.*}}, i32 192)
return _mm512_ternarylogic_epi64(__A, __B, __C, _MM_TERNLOG_A & _MM_TERNLOG_B);
}
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){0x1111, 0x2222, 0x3333, 0x4444, 0x5555, 0x6666, 0x7777, 0x8888})),
+ ((__m512i)((__v8di){0xAAAA, 0xBBBB, 0xCCCC, 0xDDDD, 0xEEEE, 0xFFFF, 0x1111, 0x2222})),
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, -0x1, 0x0, -0x1, 0x0})),
+ (unsigned char)0xD8), // C ? B : A
+ 0xAAAA, 0x2222, 0xCCCC, 0x4444, 0xEEEE, 0x6666, 0x1111, 0x8888));
+TEST_CONSTEXPR(match_v8di(
+ _mm512_ternarylogic_epi64(
+ ((__m512i)((__v8di){-0x1, 0x0, -0x1, 0x0, 0xF0F0, 0xFF, -0x5555555555555556, 0x5555555555555555})),
+ ((__m512i)((__v8di){0x1234, 0xFFFF, 0xFF, 0xF0F, 0x3333, 0xFF00, -0x5555555555555556, -0x0F0F0F0F0F0F0F10})),
+ ((__m512i)((__v8di){0xFFFF, 0x1234, 0xF0F, 0xFF00, 0xF0F0, 0x3333, 0x5555555555555555, 0x0F0F0F0F0F0F0F0})),
+ (unsigned char)0x8F), // ~A | (B & C)
+ 0x1234, -0x1, 0xF, -0x1, -0xC0C1, -0x100, 0x5555555555555555, -0x5505050505050506));
+TEST_CONSTEXPR(match_v8di(
----------------
kimsh02 wrote:
Failing test case where handling in old evaluator passes but not in the new one.
https://github.com/llvm/llvm-project/pull/158703
More information about the cfe-commits
mailing list