r271218 - [X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

Simon Pilgrim via cfe-commits cfe-commits at lists.llvm.org
Mon May 30 10:55:26 PDT 2016


Author: rksimon
Date: Mon May 30 12:55:25 2016
New Revision: 271218

URL: http://llvm.org/viewvc/llvm-project?rev=271218&view=rev
Log:
[X86][SSE] _mm_store1_ps/_mm_store1_pd should require an aligned pointer

According to the gcc headers, intel intrinsics docs and msdn codegen the _mm_store1_pd (and its _mm_store_pd1 equivalent) should use an aligned pointer - the clang headers are the only implementation I can find that assume non-aligned stores (by storing with _mm_storeu_pd).

Additionally, according to the intel intrinsics docs and msdn codegen the _mm_store1_ps (_mm_store_ps1) requires a similarly aligned pointer.

This patch raises the alignment requirements to match the other implementations by calling _mm_store_ps/_mm_store_pd instead.

I've also added the missing _mm_store_pd1 intrinsic (which maps to _mm_store1_pd like _mm_store_ps1 does to _mm_store1_ps).

As a followup I'll update the llvm fast-isel tests to match this codegen.

Differential Revision: http://reviews.llvm.org/D20617

Modified:
    cfe/trunk/lib/Headers/emmintrin.h
    cfe/trunk/lib/Headers/xmmintrin.h
    cfe/trunk/test/CodeGen/sse2-builtins.c

Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=271218&r1=271217&r2=271218&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Mon May 30 12:55:25 2016
@@ -588,19 +588,22 @@ _mm_store_sd(double *__dp, __m128d __a)
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
+_mm_store_pd(double *__dp, __m128d __a)
+{
+  *(__m128d*)__dp = __a;
+}
+
+static __inline__ void __DEFAULT_FN_ATTRS
 _mm_store1_pd(double *__dp, __m128d __a)
 {
-  struct __mm_store1_pd_struct {
-    double __u[2];
-  } __attribute__((__packed__, __may_alias__));
-  ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0];
-  ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0];
+  __a = __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
+  _mm_store_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_pd(double *__dp, __m128d __a)
+_mm_store_pd1(double *__dp, __m128d __a)
 {
-  *(__m128d *)__dp = __a;
+  return _mm_store1_pd(__dp, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/xmmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=271218&r1=271217&r2=271218&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/xmmintrin.h (original)
+++ cfe/trunk/lib/Headers/xmmintrin.h Mon May 30 12:55:25 2016
@@ -1593,22 +1593,22 @@ _mm_storeu_ps(float *__p, __m128 __a)
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store1_ps(float *__p, __m128 __a)
+_mm_store_ps(float *__p, __m128 __a)
 {
-  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
-  _mm_storeu_ps(__p, __a);
+  *(__m128*)__p = __a;
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps1(float *__p, __m128 __a)
+_mm_store1_ps(float *__p, __m128 __a)
 {
-    return _mm_store1_ps(__p, __a);
+  __a = __builtin_shufflevector((__v4sf)__a, (__v4sf)__a, 0, 0, 0, 0);
+  _mm_store_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
-_mm_store_ps(float *__p, __m128 __a)
+_mm_store_ps1(float *__p, __m128 __a)
 {
-  *(__m128 *)__p = __a;
+  return _mm_store1_ps(__p, __a);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS

Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=271218&r1=271217&r2=271218&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Mon May 30 12:55:25 2016
@@ -1205,6 +1205,13 @@ void test_mm_store_pd(double* A, __m128d
   _mm_store_pd(A, B);
 }
 
+void test_mm_store_pd1(double* x, __m128d y) {
+  // CHECK-LABEL: test_mm_store_pd1
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* {{.*}}, align 16
+  _mm_store_pd1(x, y);
+}
+
 void test_mm_store_sd(double* A, __m128d B) {
   // CHECK-LABEL: test_mm_store_sd
   // CHECK: extractelement <2 x double> %{{.*}}, i32 0
@@ -1220,9 +1227,8 @@ void test_mm_store_si128(__m128i* A, __m
 
 void test_mm_store1_pd(double* x, __m128d y) {
   // CHECK-LABEL: test_mm_store1_pd
-  // CHECK: extractelement <2 x double> %{{.*}}, i32 0
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
-  // CHECK: store {{.*}} double* {{.*}}, align 1{{$}}
+  // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> zeroinitializer
+  // CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16
   _mm_store1_pd(x, y);
 }
 




More information about the cfe-commits mailing list