r343388 - [X86] Add more of the icc unaligned load/store to/from 128 bit vector intrinsics
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Sat Sep 29 10:49:42 PDT 2018
Author: ctopper
Date: Sat Sep 29 10:49:42 2018
New Revision: 343388
URL: http://llvm.org/viewvc/llvm-project?rev=343388&view=rev
Log:
[X86] Add more of the icc unaligned load/store to/from 128 bit vector intrinsics
Summary:
This patch adds
_mm_loadu_si32
_mm_loadu_si16
_mm_storeu_si64
_mm_storeu_si32
_mm_storeu_si16
We already had _mm_load_si64.
Reviewers: spatel, RKSimon
Reviewed By: RKSimon
Subscribers: cfe-commits
Differential Revision: https://reviews.llvm.org/D52665
Modified:
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/test/CodeGen/sse2-builtins.c
Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=343388&r1=343387&r2=343388&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Sat Sep 29 10:49:42 2018
@@ -1675,7 +1675,49 @@ _mm_loadu_si64(void const *__a)
long long __v;
} __attribute__((__packed__, __may_alias__));
long long __u = ((struct __loadu_si64*)__a)->__v;
- return __extension__ (__m128i)(__v2di){__u, 0L};
+ return __extension__ (__m128i)(__v2di){__u, 0LL};
+}
+
+/// Loads a 32-bit integer value to the low element of a 128-bit integer
+/// vector and clears the upper element.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
+///
+/// \param __a
+/// A pointer to a 32-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \returns A 128-bit vector of [4 x i32] containing the loaded value.
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_loadu_si32(void const *__a)
+{
+ struct __loadu_si32 {
+ int __v;
+ } __attribute__((__packed__, __may_alias__));
+ int __u = ((struct __loadu_si32*)__a)->__v;
+ return __extension__ (__m128i)(__v4si){__u, 0, 0, 0};
+}
+
+/// Loads a 16-bit integer value to the low element of a 128-bit integer
+/// vector and clears the upper element.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic does not correspond to a specific instruction.
+///
+/// \param __a
+/// A pointer to a 16-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \returns A 128-bit vector of [8 x i16] containing the loaded value.
+static __inline__ __m128i __DEFAULT_FN_ATTRS
+_mm_loadu_si16(void const *__a)
+{
+ struct __loadu_si16 {
+ short __v;
+ } __attribute__((__packed__, __may_alias__));
+ short __u = ((struct __loadu_si16*)__a)->__v;
+ return __extension__ (__m128i)(__v8hi){__u, 0, 0, 0, 0, 0, 0, 0};
}
/// Loads a 64-bit double-precision value to the low element of a
@@ -3993,6 +4035,69 @@ _mm_storeu_si128(__m128i *__p, __m128i _
((struct __storeu_si128*)__p)->__v = __b;
}
+/// Stores a 64-bit integer value from the low element of a 128-bit integer
+/// vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVQ / MOVQ </c> instruction.
+///
+/// \param __p
+/// A pointer to a 64-bit memory location. The address of the memory
+/// location does not have to be algned.
+/// \param __b
+/// A 128-bit integer vector containing the value to be stored.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_storeu_si64(void const *__p, __m128i __b)
+{
+ struct __storeu_si64 {
+ long long __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_si64*)__p)->__v = ((__v2di)__b)[0];
+}
+
+/// Stores a 32-bit integer value from the low element of a 128-bit integer
+/// vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic corresponds to the <c> VMOVD / MOVD </c> instruction.
+///
+/// \param __p
+/// A pointer to a 32-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \param __b
+/// A 128-bit integer vector containing the value to be stored.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_storeu_si32(void const *__p, __m128i __b)
+{
+ struct __storeu_si32 {
+ int __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_si32*)__p)->__v = ((__v4si)__b)[0];
+}
+
+/// Stores a 16-bit integer value from the low element of a 128-bit integer
+/// vector.
+///
+/// \headerfile <x86intrin.h>
+///
+/// This intrinsic does not correspond to a specific instruction.
+///
+/// \param __p
+/// A pointer to a 16-bit memory location. The address of the memory
+/// location does not have to be aligned.
+/// \param __b
+/// A 128-bit integer vector containing the value to be stored.
+static __inline__ void __DEFAULT_FN_ATTRS
+_mm_storeu_si16(void const *__p, __m128i __b)
+{
+ struct __storeu_si16 {
+ short __v;
+ } __attribute__((__packed__, __may_alias__));
+ ((struct __storeu_si16*)__p)->__v = ((__v8hi)__b)[0];
+}
+
/// Moves bytes selected by the mask from the first operand to the
/// specified unaligned memory location. When a mask bit is 1, the
/// corresponding byte is written, otherwise it is not written.
Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/sse2-builtins.c?rev=343388&r1=343387&r2=343388&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/sse2-builtins.c Sat Sep 29 10:49:42 2018
@@ -721,6 +721,30 @@ __m128i test_mm_loadu_si64(void const* A
return _mm_loadu_si64(A);
}
+__m128i test_mm_loadu_si32(void const* A) {
+ // CHECK-LABEL: test_mm_loadu_si32
+ // CHECK: load i32, i32* %{{.*}}, align 1{{$}}
+ // CHECK: insertelement <4 x i32> undef, i32 %{{.*}}, i32 0
+ // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 1
+ // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 2
+ // CHECK: insertelement <4 x i32> %{{.*}}, i32 0, i32 3
+ return _mm_loadu_si32(A);
+}
+
+__m128i test_mm_loadu_si16(void const* A) {
+ // CHECK-LABEL: test_mm_loadu_si16
+ // CHECK: load i16, i16* %{{.*}}, align 1{{$}}
+ // CHECK: insertelement <8 x i16> undef, i16 %{{.*}}, i32 0
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 1
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 2
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 3
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 4
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 5
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 6
+ // CHECK: insertelement <8 x i16> %{{.*}}, i16 0, i32 7
+ return _mm_loadu_si16(A);
+}
+
__m128i test_mm_madd_epi16(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_madd_epi16
// CHECK: call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
@@ -1351,6 +1375,30 @@ void test_mm_storeu_si128(__m128i* A, __
_mm_storeu_si128(A, B);
}
+void test_mm_storeu_si64(void* A, __m128i B) {
+ // CHECK-LABEL: test_mm_storeu_si64
+ // CHECK: [[EXT:%.*]] = extractelement <2 x i64> %{{.*}}, i32 0
+ // CHECK: store i64 [[EXT]], i64* %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_si64(A, B);
+}
+
+void test_mm_storeu_si32(void* A, __m128i B) {
+ // CHECK-LABEL: test_mm_storeu_si32
+ // CHECK: [[EXT:%.*]] = extractelement <4 x i32> %{{.*}}, i32 0
+ // CHECK: store i32 [[EXT]], i32* %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_si32(A, B);
+}
+
+void test_mm_storeu_si16(void* A, __m128i B) {
+ // CHECK-LABEL: test_mm_storeu_si16
+ // CHECK: [[EXT:%.*]] = extractelement <8 x i16> %{{.*}}, i32 0
+ // CHECK: store i16 [[EXT]], i16* %{{.*}}, align 1{{$}}
+ // CHECK-NEXT: ret void
+ _mm_storeu_si16(A, B);
+}
+
void test_mm_stream_pd(double *A, __m128d B) {
// CHECK-LABEL: test_mm_stream_pd
// CHECK: store <2 x double> %{{.*}}, <2 x double>* %{{.*}}, align 16, !nontemporal
More information about the cfe-commits
mailing list