r336643 - [X86] Lowering integer truncation intrinsics to native IR

Mikhail Dvoretckii via cfe-commits cfe-commits at lists.llvm.org
Tue Jul 10 01:22:44 PDT 2018


Author: mike.dvoretsky
Date: Tue Jul 10 01:22:44 2018
New Revision: 336643

URL: http://llvm.org/viewvc/llvm-project?rev=336643&view=rev
Log:
[X86] Lowering integer truncation intrinsics to native IR

This patch lowers the _mm[256|512]_cvtepi{64|32|16}_epi{32|16|8} intrinsics to
native IR in cases where the result's length is less than 128 bits.

The resulting IR for 256-bit inputs is folded into VPMOV instructions, while for
128-bit inputs the vpshufb (or, in the 64-to-32-bit case, vinsertps)
instructions are generated instead

Differential Revision: https://reviews.llvm.org/D48712

Modified:
    cfe/trunk/lib/Headers/avx512vlbwintrin.h
    cfe/trunk/lib/Headers/avx512vlintrin.h
    cfe/trunk/test/CodeGen/avx512vl-builtins.c
    cfe/trunk/test/CodeGen/avx512vlbw-builtins.c

Modified: cfe/trunk/lib/Headers/avx512vlbwintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlbwintrin.h?rev=336643&r1=336642&r2=336643&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlbwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlbwintrin.h Tue Jul 10 01:22:44 2018
@@ -1496,10 +1496,10 @@ _mm256_maskz_cvtusepi16_epi8 (__mmask16
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi16_epi8 (__m128i __A) {
-
-  return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
-               (__v16qi) _mm_setzero_si128(),
-               (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v8hi)__A, __v8qi),
+      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+      12, 13, 14, 15);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128

Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=336643&r1=336642&r2=336643&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Tue Jul 10 01:22:44 2018
@@ -31,6 +31,10 @@
 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
 
+typedef short __v2hi __attribute__((__vector_size__(4)));
+typedef char __v4qi __attribute__((__vector_size__(4)));
+typedef char __v2qi __attribute__((__vector_size__(2)));
+
 /* Integer compare */
 
 #define _mm_cmpeq_epi32_mask(A, B) \
@@ -7341,9 +7345,9 @@ _mm256_mask_cvtusepi64_storeu_epi16 (voi
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi32_epi8 (__m128i __A)
 {
-  return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
-              (__v16qi)_mm_undefined_si128(),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
+      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -7371,9 +7375,10 @@ _mm_mask_cvtepi32_storeu_epi8 (void * __
 static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi32_epi8 (__m256i __A)
 {
-  return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
-              (__v16qi)_mm_undefined_si128(),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v8si)__A, __v8qi),
+      (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
+      12, 13, 14, 15);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS256
@@ -7400,9 +7405,9 @@ _mm256_mask_cvtepi32_storeu_epi8 (void *
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi32_epi16 (__m128i __A)
 {
-  return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
-              (__v8hi) _mm_setzero_si128 (),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
+      2, 3, 4, 5, 6, 7);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -7456,9 +7461,9 @@ _mm256_mask_cvtepi32_storeu_epi16 (void
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_epi8 (__m128i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
-              (__v16qi) _mm_undefined_si128(),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
+      3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -7485,9 +7490,9 @@ _mm_mask_cvtepi64_storeu_epi8 (void * __
 static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi64_epi8 (__m256i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
-              (__v16qi) _mm_undefined_si128(),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
+      2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS256
@@ -7514,9 +7519,8 @@ _mm256_mask_cvtepi64_storeu_epi8 (void *
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_epi32 (__m128i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
-              (__v4si)_mm_undefined_si128(),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -7571,9 +7575,9 @@ _mm256_mask_cvtepi64_storeu_epi32 (void
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_epi16 (__m128i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
-              (__v8hi) _mm_undefined_si128(),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
+      3, 3, 3, 3);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS128
@@ -7601,9 +7605,9 @@ _mm_mask_cvtepi64_storeu_epi16 (void * _
 static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi64_epi16 (__m256i __A)
 {
-  return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
-              (__v8hi)_mm_undefined_si128(),
-              (__mmask8) -1);
+  return (__m128i)__builtin_shufflevector(
+      __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
+      2, 3, 4, 5, 6, 7);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS256

Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=336643&r1=336642&r2=336643&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Tue Jul 10 01:22:44 2018
@@ -8503,7 +8503,8 @@ void test_mm256_mask_cvtusepi64_storeu_e
 
 __m128i test_mm_cvtepi32_epi8(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi32_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.db.128
+  // CHECK: trunc <4 x i32> %{{.*}} to <4 x i8>
+  // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   return _mm_cvtepi32_epi8(__A); 
 }
 
@@ -8527,7 +8528,8 @@ void test_mm_mask_cvtepi32_storeu_epi8(v
 
 __m128i test_mm256_cvtepi32_epi8(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepi32_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.db.256
+  // CHECK: trunc <8 x i32> %{{.*}} to <8 x i8>
+  // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm256_cvtepi32_epi8(__A); 
 }
 
@@ -8551,7 +8553,8 @@ void test_mm256_mask_cvtepi32_storeu_epi
 
 __m128i test_mm_cvtepi32_epi16(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi32_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmov.dw.128
+  // CHECK: trunc <4 x i32> %{{.*}} to <4 x i16>
+  // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm_cvtepi32_epi16(__A); 
 }
 
@@ -8599,7 +8602,8 @@ void test_mm256_mask_cvtepi32_storeu_epi
 
 __m128i test_mm_cvtepi64_epi8(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi64_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.qb.128
+  // CHECK: trunc <2 x i64> %{{.*}} to <2 x i8>
+  // CHECK: shufflevector <2 x i8> %{{.*}}, <2 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
   return _mm_cvtepi64_epi8(__A); 
 }
 
@@ -8623,7 +8627,8 @@ void test_mm_mask_cvtepi64_storeu_epi8(v
 
 __m128i test_mm256_cvtepi64_epi8(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepi64_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.qb.256
+  // CHECK: trunc <4 x i64> %{{.*}} to <4 x i8>
+  // CHECK: shufflevector <4 x i8> %{{.*}}, <4 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
   return _mm256_cvtepi64_epi8(__A); 
 }
 
@@ -8647,7 +8652,8 @@ void test_mm256_mask_cvtepi64_storeu_epi
 
 __m128i test_mm_cvtepi64_epi32(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi64_epi32
-  // CHECK: @llvm.x86.avx512.mask.pmov.qd.128
+  // CHECK: trunc <2 x i64> %{{.*}} to <2 x i32>
+  // CHECK: shufflevector <2 x i32> %{{.*}}, <2 x i32> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   return _mm_cvtepi64_epi32(__A); 
 }
 
@@ -8697,7 +8703,8 @@ void test_mm256_mask_cvtepi64_storeu_epi
 
 __m128i test_mm_cvtepi64_epi16(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi64_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmov.qw.128
+  // CHECK: trunc <2 x i64> %{{.*}} to <2 x i16>
+  // CHECK: shufflevector <2 x i16> %{{.*}}, <2 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 3, i32 3, i32 3>
   return _mm_cvtepi64_epi16(__A); 
 }
 
@@ -8721,7 +8728,8 @@ void test_mm_mask_cvtepi64_storeu_epi16(
 
 __m128i test_mm256_cvtepi64_epi16(__m256i __A) {
   // CHECK-LABEL: @test_mm256_cvtepi64_epi16
-  // CHECK: @llvm.x86.avx512.mask.pmov.qw.256
+  // CHECK: trunc <4 x i64> %{{.*}} to <4 x i16>
+  // CHECK: shufflevector <4 x i16> %{{.*}}, <4 x i16> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   return _mm256_cvtepi64_epi16(__A); 
 }
 

Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vlbw-builtins.c?rev=336643&r1=336642&r2=336643&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Tue Jul 10 01:22:44 2018
@@ -1792,7 +1792,8 @@ __m128i test_mm256_maskz_cvtusepi16_epi8
 
 __m128i test_mm_cvtepi16_epi8(__m128i __A) {
   // CHECK-LABEL: @test_mm_cvtepi16_epi8
-  // CHECK: @llvm.x86.avx512.mask.pmov.wb.128
+  // CHECK: trunc <8 x i16> %{{.*}} to <8 x i8>
+  // CHECK: shufflevector <8 x i8> %{{.*}}, <8 x i8> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   return _mm_cvtepi16_epi8(__A); 
 }
 




More information about the cfe-commits mailing list