[clang] [X86] Add MMX/SSE/AVX PHADD/SUB & HADDPS/D intrinsics to be used in constexpr (PR #156822)
Simon Pilgrim via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 19 06:07:41 PDT 2025
================
@@ -464,36 +464,60 @@ __m256i test_mm256_hadd_epi16(__m256i a, __m256i b) {
// CHECK: call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadd_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi(_mm256_hadd_epi16(
+ (__m256i)(__v16hi){1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16},
+ (__m256i)(__v16hi){17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32}),
+ 3,7,11,15,19,23,27,31,35,39,43,47,51,55,59,63));
__m256i test_mm256_hadd_epi32(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadd_epi32
// CHECK: call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
return _mm256_hadd_epi32(a, b);
}
+TEST_CONSTEXPR(match_v8si(_mm256_hadd_epi32(
+ (__m256i)(__v8si){10, 20, 30, 40, 50, 60, 70, 80},
+ (__m256i)(__v8si){5, 15, 25, 35, 45, 55, 65, 75})
+ 30,70,110,150,20,60,100,140))
__m256i test_mm256_hadds_epi16(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_hadds_epi16
// CHECK:call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
return _mm256_hadds_epi16(a, b);
}
+TEST_CONSTEXPR(match_v16hi( _mm256_hadds_epi16(
+ (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
+ (__m256i)(__v16hi){32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767},
+ 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767, 32767)))
----------------
RKSimon wrote:
don't use uniform values - it makes it very difficult to check the correct element orders have been kept
also - use some non-saturating values as well
https://github.com/llvm/llvm-project/pull/156822
More information about the cfe-commits
mailing list