[cfe-commits] r110096 - in /cfe/trunk: include/clang/Basic/BuiltinsX86.def test/CodeGen/builtins-x86.c
Chris Lattner
clattner at apple.com
Mon Aug 2 20:41:11 PDT 2010
On Aug 2, 2010, at 6:57 PM, Bruno Cardoso Lopes wrote:
> Author: bruno
> Date: Mon Aug 2 20:57:18 2010
> New Revision: 110096
>
> URL: http://llvm.org/viewvc/llvm-project?rev=110096&view=rev
> Log:
> Support x86 AVX 256-bit instructions built-ins. Right now support all of them, but
> as soon as we properly codegen the simple vector operations, remove the
> unnecessary built-ins/intrinsics from clang and llvm. Also add tests for the new
> built-ins
Hey Bruno,
Is __builtin_ia32_addpd256 just a simple add? We've preferred to not add builtins for operations that can be trivially matched from LLVM IR. The Clang header for AVX should just define the Xmmintrin.h functions in terms of "+" on extended vectors (for example). The other headers in clang/lib/Headers are implemented the same way.
We still need to have builtins for really crazy stuff of course.
-Chris
>
>
> Modified:
> cfe/trunk/include/clang/Basic/BuiltinsX86.def
> cfe/trunk/test/CodeGen/builtins-x86.c
>
> Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=110096&r1=110095&r2=110096&view=diff
> ==============================================================================
> --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
> +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Mon Aug 2 20:57:18 2010
> @@ -325,4 +325,105 @@
> BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "")
> BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "")
> BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLic", "")
> +
> +// AVX
> +BUILTIN(__builtin_ia32_addpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_addps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_andpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_andps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_andnpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_andnps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_divpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_divps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_mulpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_mulps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_orpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_orps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_subpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_subps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_xorpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_xorps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "")
> +BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "")
> +BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "")
> +BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "")
> +BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dc", "")
> +BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fc", "")
> +BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fc", "")
> +BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dc", "")
> +BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fc", "")
> +BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dc", "")
> +BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fc", "")
> +BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dc", "")
> +BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fc", "")
> +BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8ic", "")
> +BUILTIN(__builtin_ia32_cvtdq2pd256, "V4dV4i", "")
> +BUILTIN(__builtin_ia32_cvtdq2ps256, "V8fV8i", "")
> +BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "")
> +BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "")
> +BUILTIN(__builtin_ia32_cvtps2pd256, "V4dV4f", "")
> +BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "")
> +BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "")
> +BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "")
> +BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dc", "")
> +BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fc", "")
> +BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8ic", "")
> +BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dc", "")
> +BUILTIN(__builtin_ia32_vpermilps, "V4fV4fc", "")
> +BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dc", "")
> +BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fc", "")
> +BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dc", "")
> +BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fc", "")
> +BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4ic", "")
> +BUILTIN(__builtin_ia32_movshdup256, "V8fV8f", "")
> +BUILTIN(__builtin_ia32_movsldup256, "V8fV8f", "")
> +BUILTIN(__builtin_ia32_movddup256, "V4dV4d", "")
> +BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "")
> +BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "")
> +BUILTIN(__builtin_ia32_sqrtps_nr256, "V8fV8f", "")
> +BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "")
> +BUILTIN(__builtin_ia32_rsqrtps_nr256, "V8fV8f", "")
> +BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "")
> +BUILTIN(__builtin_ia32_roundpd256, "V4dV4di", "")
> +BUILTIN(__builtin_ia32_roundps256, "V8fV8fi", "")
> +BUILTIN(__builtin_ia32_unpckhpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_unpcklpd256, "V4dV4dV4d", "")
> +BUILTIN(__builtin_ia32_unpckhps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_unpcklps256, "V8fV8fV8f", "")
> +BUILTIN(__builtin_ia32_si256_si, "V8iV4i", "")
> +BUILTIN(__builtin_ia32_ps256_ps, "V8fV4f", "")
> +BUILTIN(__builtin_ia32_pd256_pd, "V4dV2d", "")
> +BUILTIN(__builtin_ia32_si_si256, "V4iV8i", "")
> +BUILTIN(__builtin_ia32_ps_ps256, "V4fV8f", "")
> +BUILTIN(__builtin_ia32_pd_pd256, "V2dV4d", "")
> +BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "")
> +BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "")
> +BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "")
> +BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "")
> +BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "")
> +BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "")
> +BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "")
> +BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "")
> +BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "")
> +BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "")
> +BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "")
> +BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "")
> +BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "")
> +BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "")
> +BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "")
> +BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "")
> +BUILTIN(__builtin_ia32_movmskps256, "iV8f", "")
> +
> #undef BUILTIN
>
> Modified: cfe/trunk/test/CodeGen/builtins-x86.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-x86.c?rev=110096&r1=110095&r2=110096&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/builtins-x86.c (original)
> +++ cfe/trunk/test/CodeGen/builtins-x86.c Mon Aug 2 20:57:18 2010
> @@ -24,6 +24,13 @@
> typedef float V4f __attribute__((vector_size(16)));
> typedef double V2d __attribute__((vector_size(16)));
>
> +// 256-bit
> +typedef signed int V8i __attribute__((vector_size(32)));
> +typedef signed long long V4LLi __attribute__((vector_size(32)));
> +
> +typedef double V4d __attribute__((vector_size(32)));
> +typedef float V8f __attribute__((vector_size(32)));
> +
> void f0() {
> signed char tmp_c;
> // unsigned char tmp_Uc;
> @@ -77,6 +84,12 @@
> V4f tmp_V4f;
> V2d tmp_V2d;
>
> + // 256-bit
> + V4d tmp_V4d;
> + V8f tmp_V8f;
> + V4LLi tmp_V4LLi;
> + V8i tmp_V8i;
> +
> tmp_i = __builtin_ia32_comieq(tmp_V4f, tmp_V4f);
> tmp_i = __builtin_ia32_comilt(tmp_V4f, tmp_V4f);
> tmp_i = __builtin_ia32_comile(tmp_V4f, tmp_V4f);
> @@ -365,6 +378,103 @@
> tmp_V2d = __builtin_ia32_roundpd(tmp_V2d, imm_i_0_16);
> tmp_V4f = __builtin_ia32_insertps128(tmp_V4f, tmp_V4f, tmp_i);
> #endif
> -}
> -
>
> + tmp_V4d = __builtin_ia32_addpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_addps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_addsubpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_addsubps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_andpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_andps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_andnpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_andnps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_divpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_divps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_haddpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_hsubps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_hsubpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_haddps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_maxpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_maxps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_minpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_minps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_mulpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_mulps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_orpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_orps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_subpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_subps256(tmp_V8f, tmp_V8f);
> + tmp_V4d = __builtin_ia32_xorpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_xorps256(tmp_V8f, tmp_V8f);
> + tmp_V2d = __builtin_ia32_vpermilvarpd(tmp_V2d, tmp_V2LLi);
> + tmp_V4f = __builtin_ia32_vpermilvarps(tmp_V4f, tmp_V4i);
> + tmp_V4d = __builtin_ia32_vpermilvarpd256(tmp_V4d, tmp_V4LLi);
> + tmp_V8f = __builtin_ia32_vpermilvarps256(tmp_V8f, tmp_V8i);
> + tmp_V4d = __builtin_ia32_blendpd256(tmp_V4d, tmp_V4d, 0x7);
> + tmp_V8f = __builtin_ia32_blendps256(tmp_V8f, tmp_V8f, 0x7);
> + tmp_V4d = __builtin_ia32_blendvpd256(tmp_V4d, tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_blendvps256(tmp_V8f, tmp_V8f, tmp_V8f);
> + tmp_V8f = __builtin_ia32_dpps256(tmp_V8f, tmp_V8f, 0x7);
> + tmp_V4d = __builtin_ia32_shufpd256(tmp_V4d, tmp_V4d, 0x7);
> + tmp_V8f = __builtin_ia32_shufps256(tmp_V8f, tmp_V8f, 0x7);
> + tmp_V4d = __builtin_ia32_cmppd256(tmp_V4d, tmp_V4d, 0);
> + tmp_V8f = __builtin_ia32_cmpps256(tmp_V8f, tmp_V8f, 0);
> + tmp_V2d = __builtin_ia32_vextractf128_pd256(tmp_V4d, 0x7);
> + tmp_V4f = __builtin_ia32_vextractf128_ps256(tmp_V8f, 0x7);
> + tmp_V4i = __builtin_ia32_vextractf128_si256(tmp_V8i, 0x7);
> + tmp_V4d = __builtin_ia32_cvtdq2pd256(tmp_V4i);
> + tmp_V8f = __builtin_ia32_cvtdq2ps256(tmp_V8i);
> + tmp_V4f = __builtin_ia32_cvtpd2ps256(tmp_V4d);
> + tmp_V8i = __builtin_ia32_cvtps2dq256(tmp_V8f);
> + tmp_V4d = __builtin_ia32_cvtps2pd256(tmp_V4f);
> + tmp_V4i = __builtin_ia32_cvttpd2dq256(tmp_V4d);
> + tmp_V4i = __builtin_ia32_cvtpd2dq256(tmp_V4d);
> + tmp_V8i = __builtin_ia32_cvttps2dq256(tmp_V8f);
> + tmp_V4d = __builtin_ia32_vperm2f128_pd256(tmp_V4d, tmp_V4d, 0x7);
> + tmp_V8f = __builtin_ia32_vperm2f128_ps256(tmp_V8f, tmp_V8f, 0x7);
> + tmp_V8i = __builtin_ia32_vperm2f128_si256(tmp_V8i, tmp_V8i, 0x7);
> + tmp_V2d = __builtin_ia32_vpermilpd(tmp_V2d, 0x7);
> + tmp_V4f = __builtin_ia32_vpermilps(tmp_V4f, 0x7);
> + tmp_V4d = __builtin_ia32_vpermilpd256(tmp_V4d, 0x7);
> + tmp_V8f = __builtin_ia32_vpermilps256(tmp_V8f, 0x7);
> + tmp_V4d = __builtin_ia32_vinsertf128_pd256(tmp_V4d, tmp_V2d, 0x7);
> + tmp_V8f = __builtin_ia32_vinsertf128_ps256(tmp_V8f, tmp_V4f, 0x7);
> + tmp_V8i = __builtin_ia32_vinsertf128_si256(tmp_V8i, tmp_V4i, 0x7);
> + tmp_V8f = __builtin_ia32_movshdup256(tmp_V8f);
> + tmp_V8f = __builtin_ia32_movsldup256(tmp_V8f);
> + tmp_V4d = __builtin_ia32_movddup256(tmp_V4d);
> + tmp_V4d = __builtin_ia32_sqrtpd256(tmp_V4d);
> + tmp_V8f = __builtin_ia32_sqrtps256(tmp_V8f);
> + tmp_V8f = __builtin_ia32_sqrtps_nr256(tmp_V8f);
> + tmp_V8f = __builtin_ia32_rsqrtps256(tmp_V8f);
> + tmp_V8f = __builtin_ia32_rsqrtps_nr256(tmp_V8f);
> + tmp_V8f = __builtin_ia32_rcpps256(tmp_V8f);
> + tmp_V4d = __builtin_ia32_roundpd256(tmp_V4d, tmp_i);
> + tmp_V8f = __builtin_ia32_roundps256(tmp_V8f, tmp_i);
> + tmp_V4d = __builtin_ia32_unpckhpd256(tmp_V4d, tmp_V4d);
> + tmp_V4d = __builtin_ia32_unpcklpd256(tmp_V4d, tmp_V4d);
> + tmp_V8f = __builtin_ia32_unpckhps256(tmp_V8f, tmp_V8f);
> + tmp_V8f = __builtin_ia32_unpcklps256(tmp_V8f, tmp_V8f);
> + tmp_V8i = __builtin_ia32_si256_si(tmp_V4i);
> + tmp_V8f = __builtin_ia32_ps256_ps(tmp_V4f);
> + tmp_V4d = __builtin_ia32_pd256_pd(tmp_V2d);
> + tmp_V4i = __builtin_ia32_si_si256(tmp_V8i);
> + tmp_V4f = __builtin_ia32_ps_ps256(tmp_V8f);
> + tmp_V2d = __builtin_ia32_pd_pd256(tmp_V4d);
> + tmp_i = __builtin_ia32_vtestzpd(tmp_V2d, tmp_V2d);
> + tmp_i = __builtin_ia32_vtestcpd(tmp_V2d, tmp_V2d);
> + tmp_i = __builtin_ia32_vtestnzcpd(tmp_V2d, tmp_V2d);
> + tmp_i = __builtin_ia32_vtestzps(tmp_V4f, tmp_V4f);
> + tmp_i = __builtin_ia32_vtestcps(tmp_V4f, tmp_V4f);
> + tmp_i = __builtin_ia32_vtestnzcps(tmp_V4f, tmp_V4f);
> + tmp_i = __builtin_ia32_vtestzpd256(tmp_V4d, tmp_V4d);
> + tmp_i = __builtin_ia32_vtestcpd256(tmp_V4d, tmp_V4d);
> + tmp_i = __builtin_ia32_vtestnzcpd256(tmp_V4d, tmp_V4d);
> + tmp_i = __builtin_ia32_vtestzps256(tmp_V8f, tmp_V8f);
> + tmp_i = __builtin_ia32_vtestcps256(tmp_V8f, tmp_V8f);
> + tmp_i = __builtin_ia32_vtestnzcps256(tmp_V8f, tmp_V8f);
> + tmp_i = __builtin_ia32_ptestz256(tmp_V4LLi, tmp_V4LLi);
> + tmp_i = __builtin_ia32_ptestc256(tmp_V4LLi, tmp_V4LLi);
> + tmp_i = __builtin_ia32_ptestnzc256(tmp_V4LLi, tmp_V4LLi);
> + tmp_i = __builtin_ia32_movmskpd256(tmp_V4d);
> + tmp_i = __builtin_ia32_movmskps256(tmp_V8f);
> +}
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list