[clang] [llvm] Clang: convert `__m64` intrinsics to unconditionally use SSE2 instead of MMX. (PR #96540)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 24 12:30:08 PDT 2024
github-actions[bot] wrote:
<!--LLVM CODE FORMAT COMMENT: {darker}-->
:warning: Python code formatter, darker found issues in your code. :warning:
<details>
<summary>
You can test this locally with the following command:
</summary>
``````````bash
darker --check --diff -r af6acd7442646fde56de919964bd52d7bb7922b2...a17a0df1c3551693283dd806b901d3020f33e67f mmx-tests/mmx-tests.py
``````````
</details>
<details>
<summary>
View the diff from darker here.
</summary>
``````````diff
--- mmx-tests.py 2024-06-21 21:25:54.000000 +0000
+++ mmx-tests.py 2024-06-24 19:29:45.332608 +0000
@@ -1,301 +1,1048 @@
#!/usr/bin/python3
import argparse
import sys
+
# This is a list of all intel functions and macros which take or
# return an __m64.
def do_mmx(fn):
- # mmintrin.h
- fn("_mm_cvtsi32_si64", "__m64", ("int", ))
- fn("_mm_cvtsi64_si32", "int", ("__m64", ))
- fn("_mm_cvtsi64_m64", "__m64", ("long long", ), condition='defined(__X86_64__) || defined(__clang__)')
- fn("_mm_cvtm64_si64", "long long", ("__m64", ), condition='defined(__X86_64__) || defined(__clang__)')
- fn("_mm_packs_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_packs_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_packs_pu16", "__m64", ("__m64", "__m64", ))
- fn("_mm_unpackhi_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_unpackhi_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_unpackhi_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_unpacklo_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_unpacklo_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_unpacklo_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_add_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_add_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_add_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_adds_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_adds_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_adds_pu8", "__m64", ("__m64", "__m64", ))
- fn("_mm_adds_pu16", "__m64", ("__m64", "__m64", ))
- fn("_mm_sub_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_sub_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_sub_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_subs_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_subs_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_subs_pu8", "__m64", ("__m64", "__m64", ))
- fn("_mm_subs_pu16", "__m64", ("__m64", "__m64", ))
- fn("_mm_madd_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_mulhi_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_mullo_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_sll_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_slli_pi16", "__m64", ("__m64", "int", ))
- fn("_mm_sll_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_slli_pi32", "__m64", ("__m64", "int", ))
- fn("_mm_sll_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_slli_si64", "__m64", ("__m64", "int", ))
- fn("_mm_sra_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_srai_pi16", "__m64", ("__m64", "int", ))
- fn("_mm_sra_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_srai_pi32", "__m64", ("__m64", "int", ))
- fn("_mm_srl_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_srli_pi16", "__m64", ("__m64", "int", ))
- fn("_mm_srl_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_srli_pi32", "__m64", ("__m64", "int", ))
- fn("_mm_srl_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_srli_si64", "__m64", ("__m64", "int", ))
- fn("_mm_and_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_andnot_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_or_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_xor_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_cmpeq_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_cmpeq_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_cmpeq_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_cmpgt_pi8", "__m64", ("__m64", "__m64", ))
- fn("_mm_cmpgt_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_cmpgt_pi32", "__m64", ("__m64", "__m64", ))
- fn("_mm_setzero_si64", "__m64", ())
- fn("_mm_set_pi32", "__m64", ("int", "int", ))
- fn("_mm_set_pi16", "__m64", ("short", "short", "short", "short", ))
- fn("_mm_set_pi8", "__m64", ("char", "char", "char", "char", "char", "char", "char", "char", ))
- fn("_mm_set1_pi32", "__m64", ("int", ))
- fn("_mm_set1_pi16", "__m64", ("short", ))
- fn("_mm_set1_pi8", "__m64", ("char", ))
- fn("_mm_setr_pi32", "__m64", ("int", "int", ))
- fn("_mm_setr_pi16", "__m64", ("short", "short", "short", "short", ))
- fn("_mm_setr_pi8", "__m64", ("char", "char", "char", "char", "char", "char", "char", "char", ))
-
- # xmmintrin.h
- fn("_mm_cvtps_pi32", "__m64", ("__m128", ))
- fn("_mm_cvt_ps2pi", "__m64", ("__m128", ))
- fn("_mm_cvttps_pi32", "__m64", ("__m128", ))
- fn("_mm_cvtt_ps2pi", "__m64", ("__m128", ))
- fn("_mm_cvtpi32_ps", "__m128", ("__m128", "__m64", ))
- fn("_mm_cvt_pi2ps", "__m128", ("__m128", "__m64", ))
- fn("_mm_loadh_pi", "__m128", ("__m128", "const __m64 *", ))
- fn("_mm_loadl_pi", "__m128", ("__m128", "const __m64 *", ))
- fn("_mm_storeh_pi", "void", ("__m64 *", "__m128", ))
- fn("_mm_storel_pi", "void", ("__m64 *", "__m128", ))
- fn("_mm_stream_pi", "void", ("__m64 *", "__m64", ))
- fn("_mm_max_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_max_pu8", "__m64", ("__m64", "__m64", ))
- fn("_mm_min_pi16", "__m64", ("__m64", "__m64", ))
- fn("_mm_min_pu8", "__m64", ("__m64", "__m64", ))
- fn("_mm_movemask_pi8", "int", ("__m64", ))
- fn("_mm_mulhi_pu16", "__m64", ("__m64", "__m64", ))
- fn("_mm_maskmove_si64", "void", ("__m64", "__m64", "char *", ))
- fn("_mm_avg_pu8", "__m64", ("__m64", "__m64", ))
- fn("_mm_avg_pu16", "__m64", ("__m64", "__m64", ))
- fn("_mm_sad_pu8", "__m64", ("__m64", "__m64", ))
- fn("_mm_cvtpi16_ps", "__m128", ("__m64", ))
- fn("_mm_cvtpu16_ps", "__m128", ("__m64", ))
- fn("_mm_cvtpi8_ps", "__m128", ("__m64", ))
- fn("_mm_cvtpu8_ps", "__m128", ("__m64", ))
- fn("_mm_cvtpi32x2_ps", "__m128", ("__m64", "__m64", ))
- fn("_mm_cvtps_pi16", "__m64", ("__m128", ))
- fn("_mm_cvtps_pi8", "__m64", ("__m128", ))
-
- fn("_mm_extract_pi16", "int", ("__m64", "int", ), imm_range=(0, 3))
- fn("_mm_insert_pi16", "__m64", ("__m64", "int", "int", ), imm_range=(0, 3))
- fn("_mm_shuffle_pi16", "__m64", ("__m64", "int", ), imm_range=(0, 255))
-
- # emmintrin.h
- fn("_mm_cvtpd_pi32", "__m64", ("__m128d", ))
- fn("_mm_cvttpd_pi32", "__m64", ("__m128d", ))
- fn("_mm_cvtpi32_pd", "__m128d", ("__m64", ))
- fn("_mm_add_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_mul_su32", "__m64", ("__m64", "__m64", ))
- fn("_mm_sub_si64", "__m64", ("__m64", "__m64", ))
- fn("_mm_set_epi64", "__m128i", ("__m64", "__m64", ))
- fn("_mm_set1_epi64", "__m128i", ("__m64", ))
- fn("_mm_setr_epi64", "__m128i", ("__m64", "__m64", ))
- fn("_mm_movepi64_pi64", "__m64", ("__m128i", ))
- fn("_mm_movpi64_epi64", "__m128i", ("__m64", ))
-
- # tmmintrin.h
- fn("_mm_abs_pi8", "__m64", ("__m64", ), target='ssse3')
- fn("_mm_abs_pi16", "__m64", ("__m64", ), target='ssse3')
- fn("_mm_abs_pi32", "__m64", ("__m64", ), target='ssse3')
- fn("_mm_hadd_pi16", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_hadd_pi32", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_hadds_pi16", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_hsub_pi16", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_hsub_pi32", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_hsubs_pi16", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_maddubs_pi16", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_mulhrs_pi16", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_shuffle_pi8", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_sign_pi8", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_sign_pi16", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_sign_pi32", "__m64", ("__m64", "__m64", ), target='ssse3')
- fn("_mm_alignr_pi8", "__m64", ("__m64", "__m64", "int", ), imm_range=(0, 18), target='ssse3')
+ # mmintrin.h
+ fn("_mm_cvtsi32_si64", "__m64", ("int",))
+ fn("_mm_cvtsi64_si32", "int", ("__m64",))
+ fn(
+ "_mm_cvtsi64_m64",
+ "__m64",
+ ("long long",),
+ condition="defined(__X86_64__) || defined(__clang__)",
+ )
+ fn(
+ "_mm_cvtm64_si64",
+ "long long",
+ ("__m64",),
+ condition="defined(__X86_64__) || defined(__clang__)",
+ )
+ fn(
+ "_mm_packs_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_packs_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_packs_pu16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_unpackhi_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_unpackhi_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_unpackhi_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_unpacklo_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_unpacklo_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_unpacklo_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_add_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_add_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_add_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_adds_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_adds_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_adds_pu8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_adds_pu16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_sub_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_sub_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_sub_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_subs_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_subs_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_subs_pu8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_subs_pu16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_madd_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_mulhi_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_mullo_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_sll_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_slli_pi16",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_sll_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_slli_pi32",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_sll_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_slli_si64",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_sra_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_srai_pi16",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_sra_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_srai_pi32",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_srl_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_srli_pi16",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_srl_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_srli_pi32",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_srl_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_srli_si64",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_and_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_andnot_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_or_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_xor_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_cmpeq_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_cmpeq_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_cmpeq_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_cmpgt_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_cmpgt_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_cmpgt_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn("_mm_setzero_si64", "__m64", ())
+ fn(
+ "_mm_set_pi32",
+ "__m64",
+ (
+ "int",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_set_pi16",
+ "__m64",
+ (
+ "short",
+ "short",
+ "short",
+ "short",
+ ),
+ )
+ fn(
+ "_mm_set_pi8",
+ "__m64",
+ (
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ ),
+ )
+ fn("_mm_set1_pi32", "__m64", ("int",))
+ fn("_mm_set1_pi16", "__m64", ("short",))
+ fn("_mm_set1_pi8", "__m64", ("char",))
+ fn(
+ "_mm_setr_pi32",
+ "__m64",
+ (
+ "int",
+ "int",
+ ),
+ )
+ fn(
+ "_mm_setr_pi16",
+ "__m64",
+ (
+ "short",
+ "short",
+ "short",
+ "short",
+ ),
+ )
+ fn(
+ "_mm_setr_pi8",
+ "__m64",
+ (
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ "char",
+ ),
+ )
+
+ # xmmintrin.h
+ fn("_mm_cvtps_pi32", "__m64", ("__m128",))
+ fn("_mm_cvt_ps2pi", "__m64", ("__m128",))
+ fn("_mm_cvttps_pi32", "__m64", ("__m128",))
+ fn("_mm_cvtt_ps2pi", "__m64", ("__m128",))
+ fn(
+ "_mm_cvtpi32_ps",
+ "__m128",
+ (
+ "__m128",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_cvt_pi2ps",
+ "__m128",
+ (
+ "__m128",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_loadh_pi",
+ "__m128",
+ (
+ "__m128",
+ "const __m64 *",
+ ),
+ )
+ fn(
+ "_mm_loadl_pi",
+ "__m128",
+ (
+ "__m128",
+ "const __m64 *",
+ ),
+ )
+ fn(
+ "_mm_storeh_pi",
+ "void",
+ (
+ "__m64 *",
+ "__m128",
+ ),
+ )
+ fn(
+ "_mm_storel_pi",
+ "void",
+ (
+ "__m64 *",
+ "__m128",
+ ),
+ )
+ fn(
+ "_mm_stream_pi",
+ "void",
+ (
+ "__m64 *",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_max_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_max_pu8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_min_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_min_pu8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn("_mm_movemask_pi8", "int", ("__m64",))
+ fn(
+ "_mm_mulhi_pu16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_maskmove_si64",
+ "void",
+ (
+ "__m64",
+ "__m64",
+ "char *",
+ ),
+ )
+ fn(
+ "_mm_avg_pu8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_avg_pu16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_sad_pu8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn("_mm_cvtpi16_ps", "__m128", ("__m64",))
+ fn("_mm_cvtpu16_ps", "__m128", ("__m64",))
+ fn("_mm_cvtpi8_ps", "__m128", ("__m64",))
+ fn("_mm_cvtpu8_ps", "__m128", ("__m64",))
+ fn(
+ "_mm_cvtpi32x2_ps",
+ "__m128",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn("_mm_cvtps_pi16", "__m64", ("__m128",))
+ fn("_mm_cvtps_pi8", "__m64", ("__m128",))
+
+ fn(
+ "_mm_extract_pi16",
+ "int",
+ (
+ "__m64",
+ "int",
+ ),
+ imm_range=(0, 3),
+ )
+ fn(
+ "_mm_insert_pi16",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ "int",
+ ),
+ imm_range=(0, 3),
+ )
+ fn(
+ "_mm_shuffle_pi16",
+ "__m64",
+ (
+ "__m64",
+ "int",
+ ),
+ imm_range=(0, 255),
+ )
+
+ # emmintrin.h
+ fn("_mm_cvtpd_pi32", "__m64", ("__m128d",))
+ fn("_mm_cvttpd_pi32", "__m64", ("__m128d",))
+ fn("_mm_cvtpi32_pd", "__m128d", ("__m64",))
+ fn(
+ "_mm_add_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_mul_su32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_sub_si64",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn(
+ "_mm_set_epi64",
+ "__m128i",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn("_mm_set1_epi64", "__m128i", ("__m64",))
+ fn(
+ "_mm_setr_epi64",
+ "__m128i",
+ (
+ "__m64",
+ "__m64",
+ ),
+ )
+ fn("_mm_movepi64_pi64", "__m64", ("__m128i",))
+ fn("_mm_movpi64_epi64", "__m128i", ("__m64",))
+
+ # tmmintrin.h
+ fn("_mm_abs_pi8", "__m64", ("__m64",), target="ssse3")
+ fn("_mm_abs_pi16", "__m64", ("__m64",), target="ssse3")
+ fn("_mm_abs_pi32", "__m64", ("__m64",), target="ssse3")
+ fn(
+ "_mm_hadd_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_hadd_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_hadds_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_hsub_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_hsub_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_hsubs_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_maddubs_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_mulhrs_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_shuffle_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_sign_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_sign_pi16",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_sign_pi32",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ ),
+ target="ssse3",
+ )
+ fn(
+ "_mm_alignr_pi8",
+ "__m64",
+ (
+ "__m64",
+ "__m64",
+ "int",
+ ),
+ imm_range=(0, 18),
+ target="ssse3",
+ )
+
# Generate a file full of wrapper functions for each of the above mmx
# functions.
#
# If use_xmm is set, pass/return arguments as __m128 rather than of
# __m64.
def define_wrappers(prefix, use_xmm=True, header=False):
- if header:
- print('#pragma once')
-
- print('#include <immintrin.h>')
- if use_xmm and not header:
- print('#define m128_to_m64(x) ((__m64)((__v2di)(x))[0])')
- print('#define m64_to_m128(x) ((__m128)(__v2di){(long long)(__m64)(x), 0})')
-
- def fn(name, ret_ty, arg_tys, imm_range=None, target=None, condition=None):
- if condition:
- print(f'#if {condition}')
- convert_ret = False
- if use_xmm and ret_ty == '__m64':
- ret_ty = '__v2di'
- convert_ret = True
-
- if target:
- attr = f'__attribute__((target("{target}"))) '
- else:
- attr = ''
-
- if imm_range:
- arg_tys = arg_tys[:-1]
- def translate_type(t):
- if use_xmm and t == '__m64':
- return '__m128'
- return t
- def translate_arg(t, a):
- if use_xmm and t == '__m64':
- return f'm128_to_m64({a})'
- return a
-
- arg_decl = ', '.join(f'{translate_type(v[1])} arg_{v[0]}' for v in enumerate(arg_tys)) or 'void'
- call_args = ', '.join(translate_arg(v[1], f'arg_{v[0]}') for v in enumerate(arg_tys))
-
- def create_fn(suffix, extraarg):
- if header:
- print(f'{ret_ty} {prefix}_{name}{suffix}({arg_decl});')
- else:
- print(f'{attr}{ret_ty} {prefix}_{name}{suffix}({arg_decl})')
- if use_xmm and convert_ret:
- print(f'{{ return ({ret_ty})m64_to_m128({name}({call_args}{extraarg})); }}')
+ if header:
+ print("#pragma once")
+
+ print("#include <immintrin.h>")
+ if use_xmm and not header:
+ print("#define m128_to_m64(x) ((__m64)((__v2di)(x))[0])")
+ print("#define m64_to_m128(x) ((__m128)(__v2di){(long long)(__m64)(x), 0})")
+
+ def fn(name, ret_ty, arg_tys, imm_range=None, target=None, condition=None):
+ if condition:
+ print(f"#if {condition}")
+ convert_ret = False
+ if use_xmm and ret_ty == "__m64":
+ ret_ty = "__v2di"
+ convert_ret = True
+
+ if target:
+ attr = f'__attribute__((target("{target}"))) '
else:
- print(f'{{ return {name}({call_args}{extraarg}); }}')
-
- if imm_range:
- for i in range(imm_range[0], imm_range[1]+1):
- create_fn(f'_{i}', f', {i}')
- else:
- create_fn('', '')
- if condition:
- print('#endif')
-
- do_mmx(fn)
+ attr = ""
+
+ if imm_range:
+ arg_tys = arg_tys[:-1]
+
+ def translate_type(t):
+ if use_xmm and t == "__m64":
+ return "__m128"
+ return t
+
+ def translate_arg(t, a):
+ if use_xmm and t == "__m64":
+ return f"m128_to_m64({a})"
+ return a
+
+ arg_decl = (
+ ", ".join(f"{translate_type(v[1])} arg_{v[0]}" for v in enumerate(arg_tys))
+ or "void"
+ )
+ call_args = ", ".join(
+ translate_arg(v[1], f"arg_{v[0]}") for v in enumerate(arg_tys)
+ )
+
+ def create_fn(suffix, extraarg):
+ if header:
+ print(f"{ret_ty} {prefix}_{name}{suffix}({arg_decl});")
+ else:
+ print(f"{attr}{ret_ty} {prefix}_{name}{suffix}({arg_decl})")
+ if use_xmm and convert_ret:
+ print(
+ f"{{ return ({ret_ty})m64_to_m128({name}({call_args}{extraarg})); }}"
+ )
+ else:
+ print(f"{{ return {name}({call_args}{extraarg}); }}")
+
+ if imm_range:
+ for i in range(imm_range[0], imm_range[1] + 1):
+ create_fn(f"_{i}", f", {i}")
+ else:
+ create_fn("", "")
+ if condition:
+ print("#endif")
+
+ do_mmx(fn)
# Create a C file that tests an "orig" set of wrappers against a "new"
# set of wrappers.
def define_tests(use_xmm=False):
- def fn(name, ret_ty, arg_tys, imm_range=None, target=None, condition=None):
- if condition:
- print(f'#if {condition}')
- arg_decl = ', '.join(f'{v[1]} arg_{v[0]}' for v in enumerate(arg_tys)) or 'void'
- print(f' // {ret_ty} {name}({arg_decl});')
-
- if imm_range:
- for i in range(imm_range[0], imm_range[1]+1):
- fn(name + f'_{i}', ret_ty, arg_tys[:-1], target=target)
- return
-
- convert_pre = convert_post = ''
- if use_xmm and ret_ty == '__m64':
- convert_pre = 'm128_to_m64('
- convert_post = ')'
-
- args=[]
- loops=[]
- printf_fmts = []
- printf_args = []
- for arg_ty in arg_tys:
- v=len(loops)
- if arg_ty in ('char', 'short'):
- loops.append(f' for(int l{v} = 0; l{v} < arraysize(short_vals); ++l{v}) {{')
- args.append(f'({arg_ty})short_vals[l{v}]')
- printf_fmts.append('%016x')
- printf_args.append(f'short_vals[l{v}]')
- elif arg_ty in ('int', 'long long'):
- loops.append(f' for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{')
- args.append(f'({arg_ty})mmx_vals[l{v}]')
- printf_fmts.append('%016llx')
- printf_args.append(f'mmx_vals[l{v}]')
- elif arg_ty == '__m64':
- loops.append(f' for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{')
- if use_xmm:
- loops.append(f' for(int l{v+1} = 0; l{v+1} < arraysize(padding_mmx_vals); ++l{v+1}) {{')
- args.append(f'(__m128)(__m128i){{mmx_vals[l{v}], padding_mmx_vals[l{v+1}]}}')
- printf_fmts.append('(__m128i){%016llx, %016llx}')
- printf_args.append(f'mmx_vals[l{v}], padding_mmx_vals[l{v+1}]')
+ def fn(name, ret_ty, arg_tys, imm_range=None, target=None, condition=None):
+ if condition:
+ print(f"#if {condition}")
+ arg_decl = ", ".join(f"{v[1]} arg_{v[0]}" for v in enumerate(arg_tys)) or "void"
+ print(f" // {ret_ty} {name}({arg_decl});")
+
+ if imm_range:
+ for i in range(imm_range[0], imm_range[1] + 1):
+ fn(name + f"_{i}", ret_ty, arg_tys[:-1], target=target)
+ return
+
+ convert_pre = convert_post = ""
+ if use_xmm and ret_ty == "__m64":
+ convert_pre = "m128_to_m64("
+ convert_post = ")"
+
+ args = []
+ loops = []
+ printf_fmts = []
+ printf_args = []
+ for arg_ty in arg_tys:
+ v = len(loops)
+ if arg_ty in ("char", "short"):
+ loops.append(
+ f" for(int l{v} = 0; l{v} < arraysize(short_vals); ++l{v}) {{"
+ )
+ args.append(f"({arg_ty})short_vals[l{v}]")
+ printf_fmts.append("%016x")
+ printf_args.append(f"short_vals[l{v}]")
+ elif arg_ty in ("int", "long long"):
+ loops.append(
+ f" for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{"
+ )
+ args.append(f"({arg_ty})mmx_vals[l{v}]")
+ printf_fmts.append("%016llx")
+ printf_args.append(f"mmx_vals[l{v}]")
+ elif arg_ty == "__m64":
+ loops.append(
+ f" for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{"
+ )
+ if use_xmm:
+ loops.append(
+ f" for(int l{v+1} = 0; l{v+1} < arraysize(padding_mmx_vals); ++l{v+1}) {{"
+ )
+ args.append(
+ f"(__m128)(__m128i){{mmx_vals[l{v}], padding_mmx_vals[l{v+1}]}}"
+ )
+ printf_fmts.append("(__m128i){%016llx, %016llx}")
+ printf_args.append(f"mmx_vals[l{v}], padding_mmx_vals[l{v+1}]")
+ else:
+ args.append(f"({arg_ty})mmx_vals[l{v}]")
+ printf_fmts.append("%016llx")
+ printf_args.append(f"mmx_vals[l{v}]")
+ elif arg_ty in ("__m128", "__m128i", "__m128d"):
+ loops.append(
+ f" for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{"
+ )
+ loops.append(
+ f" for(int l{v+1} = 0; l{v+1} < arraysize(mmx_vals); ++l{v+1}) {{"
+ )
+ args.append(f"({arg_ty})(__m128i){{mmx_vals[l{v}], mmx_vals[l{v+1}]}}")
+ printf_fmts.append("(__m128i){%016llx, %016llx}")
+ printf_args.append(f"mmx_vals[l{v}], mmx_vals[l{v+1}]")
+ elif arg_ty == "const __m64 *":
+ loops.append(
+ f" for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{\n"
+ + f" mem.m64 = (__m64)mmx_vals[l{v}];"
+ )
+ args.append(f"&mem.m64")
+ printf_fmts.append("&mem.m64 /* %016llx */")
+ printf_args.append(f"(long long)mem.m64")
+ else:
+ print(" // -> UNSUPPORTED")
+ return
+
+ printf_fmt_str = '"' + ", ".join(printf_fmts) + '"'
+ if printf_args:
+ printf_arg_str = ", " + ",".join(printf_args)
else:
- args.append(f'({arg_ty})mmx_vals[l{v}]')
- printf_fmts.append('%016llx')
- printf_args.append(f'mmx_vals[l{v}]')
- elif arg_ty in ('__m128', '__m128i', '__m128d'):
- loops.append(f' for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{')
- loops.append(f' for(int l{v+1} = 0; l{v+1} < arraysize(mmx_vals); ++l{v+1}) {{')
- args.append(f'({arg_ty})(__m128i){{mmx_vals[l{v}], mmx_vals[l{v+1}]}}')
- printf_fmts.append('(__m128i){%016llx, %016llx}')
- printf_args.append(f'mmx_vals[l{v}], mmx_vals[l{v+1}]')
- elif arg_ty == 'const __m64 *':
- loops.append(f' for(int l{v} = 0; l{v} < arraysize(mmx_vals); ++l{v}) {{\n' +
- f' mem.m64 = (__m64)mmx_vals[l{v}];')
- args.append(f'&mem.m64')
- printf_fmts.append('&mem.m64 /* %016llx */')
- printf_args.append(f'(long long)mem.m64')
- else:
- print(' // -> UNSUPPORTED')
- return
-
- printf_fmt_str = '"' + ', '.join(printf_fmts) + '"'
- if printf_args:
- printf_arg_str = ', ' + ','.join(printf_args)
- else:
- printf_arg_str = ''
-
- print('\n'.join(loops))
- print(f'''
+ printf_arg_str = ""
+
+ print("\n".join(loops))
+ print(
+ f"""
clear_exc_flags();
{ret_ty} orig_res = {convert_pre}orig_{name}({", ".join(args)}){convert_post};
int orig_exc = get_exc_flags();
clear_exc_flags();
{ret_ty} new_res = {convert_pre}new_{name}({", ".join(args)}){convert_post};
int new_exc = get_exc_flags();
check_mismatch("{name}", orig_exc, new_exc, &orig_res, &new_res, sizeof(orig_res), {printf_fmt_str}{printf_arg_str});
-''')
- print(' }\n' * len(loops))
- print()
- if condition:
- print('#endif')
-
- do_mmx(fn)
-
-
-parser = argparse.ArgumentParser(description='Generate mmx test code.')
-parser.add_argument('--kind', choices=['wrapper', 'wrapper_h', 'test'])
-parser.add_argument('--wrapper-prefix', default='orig')
-parser.add_argument('--use-xmm', action='store_true')
+"""
+ )
+ print(" }\n" * len(loops))
+ print()
+ if condition:
+ print("#endif")
+
+ do_mmx(fn)
+
+
+parser = argparse.ArgumentParser(description="Generate mmx test code.")
+parser.add_argument("--kind", choices=["wrapper", "wrapper_h", "test"])
+parser.add_argument("--wrapper-prefix", default="orig")
+parser.add_argument("--use-xmm", action="store_true")
args = parser.parse_args()
-if args.kind == 'wrapper':
- define_wrappers(args.wrapper_prefix, use_xmm=args.use_xmm, header=False)
-elif args.kind == 'wrapper_h':
- define_wrappers(args.wrapper_prefix, use_xmm=args.use_xmm, header=True)
-elif args.kind == 'test':
- define_tests(use_xmm=args.use_xmm)
+if args.kind == "wrapper":
+ define_wrappers(args.wrapper_prefix, use_xmm=args.use_xmm, header=False)
+elif args.kind == "wrapper_h":
+ define_wrappers(args.wrapper_prefix, use_xmm=args.use_xmm, header=True)
+elif args.kind == "test":
+ define_tests(use_xmm=args.use_xmm)
``````````
</details>
https://github.com/llvm/llvm-project/pull/96540
More information about the cfe-commits
mailing list