r193267 - Intrinsics: fix extract & insert when index is out of bound.

Manman Ren manman.ren at gmail.com
Wed Oct 23 13:33:14 PDT 2013


Author: mren
Date: Wed Oct 23 15:33:14 2013
New Revision: 193267

URL: http://llvm.org/viewvc/llvm-project?rev=193267&view=rev
Log:
Intrinsics: fix extract & insert when index is out of bound.

Now, all extract & insert intrinsics should have the correct and operation
to ignore higher bits.

rdar://15250497

Modified:
    cfe/trunk/lib/Headers/avxintrin.h
    cfe/trunk/lib/Headers/smmintrin.h
    cfe/trunk/test/CodeGen/avx-builtins.c
    cfe/trunk/test/CodeGen/sse-builtins.c

Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=193267&r1=193266&r2=193267&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Wed Oct 23 15:33:14 2013
@@ -435,21 +435,21 @@ static __inline int __attribute__((__alw
 _mm256_extract_epi32(__m256i __a, int const __imm)
 {
   __v8si __b = (__v8si)__a;
-  return __b[__imm];
+  return __b[__imm & 7];
 }
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
 _mm256_extract_epi16(__m256i __a, int const __imm)
 {
   __v16hi __b = (__v16hi)__a;
-  return __b[__imm];
+  return __b[__imm & 15];
 }
 
 static __inline int __attribute__((__always_inline__, __nodebug__))
 _mm256_extract_epi8(__m256i __a, int const __imm)
 {
   __v32qi __b = (__v32qi)__a;
-  return __b[__imm];
+  return __b[__imm & 31];
 }
 
 #ifdef __x86_64__
@@ -457,7 +457,7 @@ static __inline long long  __attribute__
 _mm256_extract_epi64(__m256i __a, const int __imm)
 {
   __v4di __b = (__v4di)__a;
-  return __b[__imm];
+  return __b[__imm & 3];
 }
 #endif
 

Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=193267&r1=193266&r2=193267&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Wed Oct 23 15:33:14 2013
@@ -197,7 +197,7 @@ _mm_max_epu32 (__m128i __V1, __m128i __V
 #define _mm_extract_ps(X, N) (__extension__                      \
                               ({ union { int __i; float __f; } __t;  \
                                  __v4sf __a = (__v4sf)(X);       \
-                                 __t.__f = __a[N];                 \
+                                 __t.__f = __a[(N) & 3];                 \
                                  __t.__i;}))
 
 /* Miscellaneous insert and extract macros.  */
@@ -215,14 +215,14 @@ _mm_max_epu32 (__m128i __V1, __m128i __V
                                              
 /* Insert int into packed integer array at index.  */
 #define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
-                                                   __a[(N)] = (I);             \
+                                                   __a[(N) & 15] = (I);             \
                                                    __a;}))
 #define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
-                                                    __a[(N)] = (I);           \
+                                                    __a[(N) & 3] = (I);           \
                                                     __a;}))
 #ifdef __x86_64__
 #define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
-                                                    __a[(N)] = (I);           \
+                                                    __a[(N) & 1] = (I);           \
                                                     __a;}))
 #endif /* __x86_64__ */
 
@@ -231,12 +231,12 @@ _mm_max_epu32 (__m128i __V1, __m128i __V
  */
 #define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)(X); \
                                                  (int)(unsigned char) \
-                                                     __a[(N)];}))
+                                                     __a[(N) & 15];}))
 #define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)(X); \
-                                                  __a[(N)];}))
+                                                  __a[(N) & 3];}))
 #ifdef __x86_64__
 #define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)(X); \
-                                                  __a[(N)];}))
+                                                  __a[(N) & 1];}))
 #endif /* __x86_64 */
 
 /* SSE4 128-bit Packed Integer Comparisons.  */

Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=193267&r1=193266&r2=193267&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Wed Oct 23 15:33:14 2013
@@ -93,3 +93,21 @@ int test_mm_cmpistrz(__m128i A, __m128i
   // CHECK: @llvm.x86.sse42.pcmpistriz128
   return _mm_cmpistrz(A, B, 7);
 }
+
+int test_extract_epi32(__m256i __a) {
+  // CHECK-LABEL: @test_extract_epi32
+  // CHECK: extractelement <8 x i32> %{{.*}}, i32 0
+  return _mm256_extract_epi32(__a, 8);
+}
+
+int test_extract_epi16(__m256i __a) {
+  // CHECK-LABEL: @test_extract_epi16
+  // CHECK: extractelement <16 x i16> %{{.*}}, i32 0
+  return _mm256_extract_epi16(__a, 16);
+}
+
+int test_extract_epi8(__m256i __a) {
+  // CHECK-LABEL: @test_extract_epi8
+  // CHECK: extractelement <32 x i8> %{{.*}}, i32 0
+  return _mm256_extract_epi8(__a, 32);
+}

Modified: cfe/trunk/test/CodeGen/sse-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse-builtins.c?rev=193267&r1=193266&r2=193267&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse-builtins.c Wed Oct 23 15:33:14 2013
@@ -213,3 +213,27 @@ void test_extract_epi16(__m128i __a) {
   // CHECK: extractelement <8 x i16> %{{.*}}, i32 [[x]]
   _mm_extract_epi16(__a, 8);
 }
+
+int test_extract_ps(__m128i __a) {
+  // CHECK-LABEL: @test_extract_ps
+  // CHECK: extractelement <4 x float> %{{.*}}, i32 0
+  return _mm_extract_ps(__a, 4);
+}
+
+int test_extract_epi8(__m128i __a) {
+  // CHECK-LABEL: @test_extract_epi8
+  // CHECK: extractelement <16 x i8> %{{.*}}, i32 0
+  return _mm_extract_epi8(__a, 16);
+}
+
+int test_extract_epi32(__m128i __a) {
+  // CHECK-LABEL: @test_extract_epi32
+  // CHECK: extractelement <4 x i32> %{{.*}}, i32 0
+  return _mm_extract_epi32(__a, 4);
+}
+
+void test_insert_epi32(__m128i __a, int b) {
+  // CHECK-LABEL: @test_insert_epi32
+  // CHECK: insertelement <4 x i32> %{{.*}}, i32 %{{.*}}, i32 0
+   _mm_insert_epi32(__a, b, 4);
+}





More information about the cfe-commits mailing list