[cfe-commits] r72979 - in /cfe/trunk/lib/Headers: emmintrin.h mmintrin.h xmmintrin.h
Eli Friedman
eli.friedman at gmail.com
Fri Jun 5 19:13:04 PDT 2009
Author: efriedma
Date: Fri Jun 5 21:13:04 2009
New Revision: 72979
URL: http://llvm.org/viewvc/llvm-project?rev=72979&view=rev
Log:
Misc fixes to MMX/SSE intrinsics: a few small bug fixes, and getting rid
of calls to builtins for constructs which can be expressed directly.
Modified:
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/mmintrin.h
cfe/trunk/lib/Headers/xmmintrin.h
Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=72979&r1=72978&r2=72979&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Fri Jun 5 21:13:04 2009
@@ -40,7 +40,8 @@
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_add_sd(__m128d a, __m128d b)
{
- return __builtin_ia32_addsd(a, b);
+ a[0] += b[0];
+ return a;
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
@@ -52,7 +53,8 @@
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_sub_sd(__m128d a, __m128d b)
{
- return __builtin_ia32_subsd(a, b);
+ a[0] -= b[0];
+ return a;
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
@@ -64,7 +66,8 @@
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_mul_sd(__m128d a, __m128d b)
{
- return __builtin_ia32_mulsd(a, b);
+ a[0] *= b[0];
+ return a;
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
@@ -76,7 +79,8 @@
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_div_sd(__m128d a, __m128d b)
{
- return __builtin_ia32_divsd(a, b);
+ a[0] /= b[0];
+ return a;
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
@@ -125,25 +129,25 @@
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_and_pd(__m128d a, __m128d b)
{
- return __builtin_ia32_andpd(a, b);
+ return (__m128)((__v4si)a & (__v4si)b);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_andnot_pd(__m128d a, __m128d b)
{
- return __builtin_ia32_andnpd(a, b);
+ return (__m128)(~(__v4si)a & (__v4si)b);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_or_pd(__m128d a, __m128d b)
{
- return __builtin_ia32_orpd(a, b);
+ return (__m128)((__v4si)a | (__v4si)b);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_xor_pd(__m128d a, __m128d b)
{
- return __builtin_ia32_xorpd(a, b);
+ return (__m128)((__v4si)a ^ (__v4si)b);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
@@ -383,7 +387,8 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtsd_ss(__m128 a, __m128d b)
{
- return __builtin_ia32_cvtsd2ss(a, b);
+ a[0] = b[0];
+ return a;
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
@@ -395,7 +400,8 @@
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtss_sd(__m128d a, __m128 b)
{
- return __builtin_ia32_cvtss2sd(a, b);
+ a[0] = b[0];
+ return a;
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
@@ -407,7 +413,7 @@
static inline int __attribute__((__always_inline__, __nodebug__))
_mm_cvttsd_si32(__m128d a)
{
- return __builtin_ia32_cvttsd2si(a);
+ return a[0];
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
@@ -747,25 +753,25 @@
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_and_si128(__m128i a, __m128i b)
{
- return __builtin_ia32_pand128(a, b);
+ return a & b;
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_andnot_si128(__m128i a, __m128i b)
{
- return __builtin_ia32_pandn128(a, b);
+ return ~a & b;
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_or_si128(__m128i a, __m128i b)
{
- return __builtin_ia32_por128(a, b);
+ return a | b;
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_xor_si128(__m128i a, __m128i b)
{
- return __builtin_ia32_pxor128(a, b);
+ return a ^ b;
}
static inline __m128i __attribute__((__always_inline__, __nodebug__))
@@ -934,7 +940,8 @@
static inline __m128d __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi64_sd(__m128d a, long long b)
{
- return __builtin_ia32_cvtsi642sd(a, b);
+ a[0] = b;
+ return a;
}
static inline long long __attribute__((__always_inline__, __nodebug__))
@@ -946,7 +953,7 @@
static inline long long __attribute__((__always_inline__, __nodebug__))
_mm_cvttsd_si64(__m128d a)
{
- return __builtin_ia32_cvttsd2si64(a);
+ return a[0];
}
#endif
@@ -1181,7 +1188,9 @@
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_insert_epi16(__m128i a, int b, int imm)
{
- return (__m128i)__builtin_ia32_vec_set_v8hi((__v8hi)a, b, imm);
+ __v8hi c = (__v8hi)a;
+ c[imm & 7] = b;
+ return c;
}
static inline int __attribute__((__always_inline__, __nodebug__))
@@ -1257,7 +1266,7 @@
static inline __m128i __attribute__((__always_inline__, __nodebug__))
_mm_move_epi64(__m128i a)
{
- return (__m128i){ a[0], 0 };
+ return __builtin_shufflevector(a, (__m128i){ 0 }, 0, 2);
}
static inline __m128d __attribute__((__always_inline__, __nodebug__))
Modified: cfe/trunk/lib/Headers/mmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/mmintrin.h?rev=72979&r1=72978&r2=72979&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/mmintrin.h (original)
+++ cfe/trunk/lib/Headers/mmintrin.h Fri Jun 5 21:13:04 2009
@@ -415,13 +415,13 @@
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set1_pi16(short __s)
{
- return (__m64)(__v4hi){ __s };
+ return (__m64)(__v4hi){ __s, __s, __s, __s };
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_set1_pi8(char __b)
{
- return (__m64)(__v8qi){ __b };
+ return (__m64)(__v8qi){ __b, __b, __b, __b, __b, __b, __b, __b };
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
Modified: cfe/trunk/lib/Headers/xmmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=72979&r1=72978&r2=72979&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/xmmintrin.h (original)
+++ cfe/trunk/lib/Headers/xmmintrin.h Fri Jun 5 21:13:04 2009
@@ -38,7 +38,8 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_add_ss(__m128 a, __m128 b)
{
- return __builtin_ia32_addss(a, b);
+ a[0] += b[0];
+ return a;
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
@@ -50,7 +51,8 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_sub_ss(__m128 a, __m128 b)
{
- return __builtin_ia32_subss(a, b);
+ a[0] -= b[0];
+ return a;
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
@@ -62,7 +64,8 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_mul_ss(__m128 a, __m128 b)
{
- return __builtin_ia32_mulss(a, b);
+ a[0] *= b[0];
+ return a;
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
@@ -74,7 +77,8 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_div_ss(__m128 a, __m128 b)
{
- return __builtin_ia32_divss(a, b);
+ a[0] /= b[0];
+ return a;
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
@@ -146,25 +150,29 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_and_ps(__m128 a, __m128 b)
{
- return __builtin_ia32_andps(a, b);
+ typedef int __v4si __attribute__((__vector_size__(16)));
+ return (__m128)((__v4si)a & (__v4si)b);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_andnot_ps(__m128 a, __m128 b)
{
- return __builtin_ia32_andnps(a, b);
+ typedef int __v4si __attribute__((__vector_size__(16)));
+ return (__m128)(~(__v4si)a & (__v4si)b);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_or_ps(__m128 a, __m128 b)
{
- return __builtin_ia32_orps(a, b);
+ typedef int __v4si __attribute__((__vector_size__(16)));
+ return (__m128)((__v4si)a | (__v4si)b);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_xor_ps(__m128 a, __m128 b)
{
- return __builtin_ia32_xorps(a, b);
+ typedef int __v4si __attribute__((__vector_size__(16)));
+ return (__m128)((__v4si)a ^ ~(__v4si)b);
}
static inline __m128 __attribute__((__always_inline__, __nodebug__))
@@ -389,12 +397,16 @@
return __builtin_ia32_cvtss2si(a);
}
+#ifdef __x86_64__
+
static inline long long __attribute__((__always_inline__, __nodebug__))
_mm_cvtss_si64(__m128 a)
{
return __builtin_ia32_cvtss2si64(a);
}
+#endif
+
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_cvtps_pi32(__m128 a)
{
@@ -404,13 +416,13 @@
static inline int __attribute__((__always_inline__, __nodebug__))
_mm_cvttss_si32(__m128 a)
{
- return __builtin_ia32_cvttss2si(a);
+ return a[0];
}
static inline long long __attribute__((__always_inline__, __nodebug__))
_mm_cvttss_si64(__m128 a)
{
- return __builtin_ia32_cvttss2si64(a);
+ return a[0];
}
static inline __m64 __attribute__((__always_inline__, __nodebug__))
@@ -422,7 +434,8 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi32_ss(__m128 a, int b)
{
- return __builtin_ia32_cvtsi2ss(a, b);
+ a[0] = b;
+ return a;
}
#ifdef __x86_64__
@@ -430,7 +443,8 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_cvtsi64_ss(__m128 a, long long b)
{
- return __builtin_ia32_cvtsi642ss(a, b);
+ a[0] = b;
+ return a;
}
#endif
@@ -456,6 +470,13 @@
static inline __m128 __attribute__((__always_inline__, __nodebug__))
_mm_loadl_pi(__m128 a, __m64 const *p)
{
+#if 0
+ // FIXME: This should work, but gives really crappy code at the moment
+ __m128 b;
+ b[0] = *(float*)p;
+ b[1] = *((float*)p+1);
+ return __builtin_shufflevector(a, b, 0, 1, 4, 5);
+#endif
return __builtin_ia32_loadlps(a, (__v2si *)p);
}
@@ -604,26 +625,17 @@
static inline int __attribute__((__always_inline__, __nodebug__))
_mm_extract_pi16(__m64 a, int n)
{
- /* FIXME:
- * This should force n to be an immediate.
- * This does not use the PEXTRW instruction. From looking at the LLVM source, the
- instruction doesn't seem to be hooked up.
- * The code could probably be made better :)
- */
__v4hi b = (__v4hi)a;
- return b[(n == 0) ? 0 : (n == 1 ? 1 : (n == 2 ? 2 : 3))];
+ return (unsigned short)b[n & 3];
}
-/* FIXME: Implement this. We could add a __builtin_insertelement function that's similar to
- the already existing __builtin_shufflevector.
-*/
-/*
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_insert_pi16(__m64 a, int d, int n)
{
- return (__m64){ 0LL };
+ __v4hi b = (__v4hi)a;
+ b[n & 3] = d;
+ return b;
}
-*/
static inline __m64 __attribute__((__always_inline__, __nodebug__))
_mm_max_pi16(__m64 a, __m64 b)
More information about the cfe-commits
mailing list