r318025 - [x86][AVX512] Lowering shuffle i/f intrinsics to LLVM IR
Jina Nahias via cfe-commits
cfe-commits at lists.llvm.org
Mon Nov 13 01:15:31 PST 2017
Author: jina.nahias
Date: Mon Nov 13 01:15:31 2017
New Revision: 318025
URL: http://llvm.org/viewvc/llvm-project?rev=318025&view=rev
Log:
[x86][AVX512] Lowering shuffle i/f intrinsics to LLVM IR
This patch, together with a matching llvm patch (https://reviews.llvm.org/D38671), implements the lowering of X86 shuffle i/f intrinsics to IR.
Differential Revision: https://reviews.llvm.org/D38672
Change-Id: I9b3c2f2b34323bd9ccb21d0c1832f848b88ec047
Modified:
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/test/CodeGen/avx512f-builtins.c
cfe/trunk/test/CodeGen/avx512vl-builtins.c
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=318025&r1=318024&r2=318025&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Mon Nov 13 01:15:31 2017
@@ -7006,76 +7006,100 @@ _mm512_maskz_srai_epi64(__mmask8 __U, __
}
#define _mm512_shuffle_f32x4(A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(imm), \
- (__v16sf)_mm512_undefined_ps(), \
- (__mmask16)-1); })
+ (__m512)__builtin_shufflevector((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(B), \
+ 0 + ((((imm) >> 0) & 0x3) * 4), \
+ 1 + ((((imm) >> 0) & 0x3) * 4), \
+ 2 + ((((imm) >> 0) & 0x3) * 4), \
+ 3 + ((((imm) >> 0) & 0x3) * 4), \
+ 0 + ((((imm) >> 2) & 0x3) * 4), \
+ 1 + ((((imm) >> 2) & 0x3) * 4), \
+ 2 + ((((imm) >> 2) & 0x3) * 4), \
+ 3 + ((((imm) >> 2) & 0x3) * 4), \
+ 16 + ((((imm) >> 4) & 0x3) * 4), \
+ 17 + ((((imm) >> 4) & 0x3) * 4), \
+ 18 + ((((imm) >> 4) & 0x3) * 4), \
+ 19 + ((((imm) >> 4) & 0x3) * 4), \
+ 16 + ((((imm) >> 6) & 0x3) * 4), \
+ 17 + ((((imm) >> 6) & 0x3) * 4), \
+ 18 + ((((imm) >> 6) & 0x3) * 4), \
+ 19 + ((((imm) >> 6) & 0x3) * 4)); })
#define _mm512_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(imm), \
- (__v16sf)(__m512)(W), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
+ (__v16sf)(__m512)(W)); })
#define _mm512_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
- (__m512)__builtin_ia32_shuf_f32x4_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(B), (int)(imm), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(U)); })
+ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
+ (__v16sf)_mm512_shuffle_f32x4((A), (B), (imm)), \
+ (__v16sf)_mm512_setzero_ps()); })
#define _mm512_shuffle_f64x2(A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(imm), \
- (__v8df)_mm512_undefined_pd(), \
- (__mmask8)-1); })
+ (__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(B), \
+ 0 + ((((imm) >> 0) & 0x3) * 2), \
+ 1 + ((((imm) >> 0) & 0x3) * 2), \
+ 0 + ((((imm) >> 2) & 0x3) * 2), \
+ 1 + ((((imm) >> 2) & 0x3) * 2), \
+ 8 + ((((imm) >> 4) & 0x3) * 2), \
+ 9 + ((((imm) >> 4) & 0x3) * 2), \
+ 8 + ((((imm) >> 6) & 0x3) * 2), \
+ 9 + ((((imm) >> 6) & 0x3) * 2)); })
#define _mm512_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(imm), \
- (__v8df)(__m512d)(W), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
+ (__v8df)(__m512d)(W)); })
#define _mm512_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
- (__m512d)__builtin_ia32_shuf_f64x2_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(B), (int)(imm), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
+ (__v8df)_mm512_shuffle_f64x2((A), (B), (imm)), \
+ (__v8df)_mm512_setzero_pd()); })
#define _mm512_shuffle_i32x4(A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), (int)(imm), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)-1); })
+ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ 0 + ((((imm) >> 0) & 0x3) * 2), \
+ 1 + ((((imm) >> 0) & 0x3) * 2), \
+ 0 + ((((imm) >> 2) & 0x3) * 2), \
+ 1 + ((((imm) >> 2) & 0x3) * 2), \
+ 8 + ((((imm) >> 4) & 0x3) * 2), \
+ 9 + ((((imm) >> 4) & 0x3) * 2), \
+ 8 + ((((imm) >> 6) & 0x3) * 2), \
+ 9 + ((((imm) >> 6) & 0x3) * 2)); })
#define _mm512_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), (int)(imm), \
- (__v16si)(__m512i)(W), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
+ (__v16si)(__m512i)(W)); })
#define _mm512_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_shuf_i32x4_mask((__v16si)(__m512i)(A), \
- (__v16si)(__m512i)(B), (int)(imm), \
- (__v16si)_mm512_setzero_si512(), \
- (__mmask16)(U)); })
+ (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
+ (__v16si)_mm512_shuffle_i32x4((A), (B), (imm)), \
+ (__v16si)_mm512_setzero_si512()); })
#define _mm512_shuffle_i64x2(A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), (int)(imm), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)-1); })
+ (__m512i)__builtin_shufflevector((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), \
+ 0 + ((((imm) >> 0) & 0x3) * 2), \
+ 1 + ((((imm) >> 0) & 0x3) * 2), \
+ 0 + ((((imm) >> 2) & 0x3) * 2), \
+ 1 + ((((imm) >> 2) & 0x3) * 2), \
+ 8 + ((((imm) >> 4) & 0x3) * 2), \
+ 9 + ((((imm) >> 4) & 0x3) * 2), \
+ 8 + ((((imm) >> 6) & 0x3) * 2), \
+ 9 + ((((imm) >> 6) & 0x3) * 2)); })
#define _mm512_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), (int)(imm), \
- (__v8di)(__m512i)(W), \
- (__mmask8)(U)); })
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
+ (__v8di)(__m512i)(W)); })
#define _mm512_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
- (__m512i)__builtin_ia32_shuf_i64x2_mask((__v8di)(__m512i)(A), \
- (__v8di)(__m512i)(B), (int)(imm), \
- (__v8di)_mm512_setzero_si512(), \
- (__mmask8)(U)); })
+ (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
+ (__v8di)_mm512_shuffle_i64x2((A), (B), (imm)), \
+ (__v8di)_mm512_setzero_si512()); })
#define _mm512_shuffle_pd(A, B, M) __extension__ ({ \
(__m512d)__builtin_shufflevector((__v8df)(__m512d)(A), \
Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=318025&r1=318024&r2=318025&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Mon Nov 13 01:15:31 2017
@@ -6600,85 +6600,81 @@ _mm256_maskz_srai_epi64(__mmask8 __U, __
#define _mm256_shuffle_f32x4(A, B, imm) __extension__ ({ \
- (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)-1); })
+ (__m256)__builtin_shufflevector((__v8sf)(__m256)(A), \
+ (__v8sf)(__m256)(B), \
+ 0 + ((((imm) >> 0) & 0x1) * 4), \
+ 1 + ((((imm) >> 0) & 0x1) * 4), \
+ 2 + ((((imm) >> 0) & 0x1) * 4), \
+ 3 + ((((imm) >> 0) & 0x1) * 4), \
+ 8 + ((((imm) >> 1) & 0x1) * 4), \
+ 9 + ((((imm) >> 1) & 0x1) * 4), \
+ 10 + ((((imm) >> 1) & 0x1) * 4), \
+ 11 + ((((imm) >> 1) & 0x1) * 4)); })
#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) __extension__ ({ \
- (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), (int)(imm), \
- (__v8sf)(__m256)(W), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
+ (__v8sf)(__m256)(W)); })
#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) __extension__ ({ \
- (__m256)__builtin_ia32_shuf_f32x4_256_mask((__v8sf)(__m256)(A), \
- (__v8sf)(__m256)(B), (int)(imm), \
- (__v8sf)_mm256_setzero_ps(), \
- (__mmask8)(U)); })
+ (__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
+ (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
+ (__v8sf)_mm256_setzero_ps()); })
#define _mm256_shuffle_f64x2(A, B, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), \
- (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)-1); })
+ (__m256d)__builtin_shufflevector((__v4df)(__m256d)(A), \
+ (__v4df)(__m256d)(B), \
+ 0 + ((((imm) >> 0) & 0x1) * 2), \
+ 1 + ((((imm) >> 0) & 0x1) * 2), \
+ 4 + ((((imm) >> 1) & 0x1) * 2), \
+ 5 + ((((imm) >> 1) & 0x1) * 2)); })
#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), \
- (int)(imm), \
- (__v4df)(__m256d)(W), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
+ (__v4df)(__m256)(W)); })
#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) __extension__ ({ \
- (__m256d)__builtin_ia32_shuf_f64x2_256_mask((__v4df)(__m256d)(A), \
- (__v4df)(__m256d)(B), \
- (int)(imm), \
- (__v4df)_mm256_setzero_pd(), \
- (__mmask8)(U)); })
+ (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
+ (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
+ (__v4df)_mm256_setzero_pd()); })
#define _mm256_shuffle_i32x4(A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), \
- (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)-1); })
+ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ 0 + ((((imm) >> 0) & 0x1) * 2), \
+ 1 + ((((imm) >> 0) & 0x1) * 2), \
+ 4 + ((((imm) >> 1) & 0x1) * 2), \
+ 5 + ((((imm) >> 1) & 0x1) * 2)); })
#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), \
- (int)(imm), \
- (__v8si)(__m256i)(W), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
+ (__v8si)(__m256)(W)); })
#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_shuf_i32x4_256_mask((__v8si)(__m256i)(A), \
- (__v8si)(__m256i)(B), \
- (int)(imm), \
- (__v8si)_mm256_setzero_si256(), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
+ (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
+ (__v8si)_mm256_setzero_si256()); })
#define _mm256_shuffle_i64x2(A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), \
- (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)-1); })
+ (__m256i)__builtin_shufflevector((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), \
+ 0 + ((((imm) >> 0) & 0x1) * 2), \
+ 1 + ((((imm) >> 0) & 0x1) * 2), \
+ 4 + ((((imm) >> 1) & 0x1) * 2), \
+ 5 + ((((imm) >> 1) & 0x1) * 2)); })
#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), \
- (int)(imm), \
- (__v4di)(__m256i)(W), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
+ (__v4di)(__m256)(W)); })
+
#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) __extension__ ({ \
- (__m256i)__builtin_ia32_shuf_i64x2_256_mask((__v4di)(__m256i)(A), \
- (__v4di)(__m256i)(B), \
- (int)(imm), \
- (__v4di)_mm256_setzero_si256(), \
- (__mmask8)(U)); })
+ (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
+ (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
+ (__v4di)_mm256_setzero_si256()); })
#define _mm_mask_shuffle_pd(W, U, A, B, M) __extension__ ({ \
(__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
Modified: cfe/trunk/test/CodeGen/avx512f-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512f-builtins.c?rev=318025&r1=318024&r2=318025&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512f-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512f-builtins.c Mon Nov 13 01:15:31 2017
@@ -4484,73 +4484,81 @@ __m512i test_mm512_maskz_ternarylogic_ep
__m512 test_mm512_shuffle_f32x4(__m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_shuffle_f32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
return _mm512_shuffle_f32x4(__A, __B, 4);
}
__m512 test_mm512_mask_shuffle_f32x4(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_mask_shuffle_f32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_shuffle_f32x4(__W, __U, __A, __B, 4);
}
__m512 test_mm512_maskz_shuffle_f32x4(__mmask16 __U, __m512 __A, __m512 __B) {
// CHECK-LABEL: @test_mm512_maskz_shuffle_f32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+ // CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_shuffle_f32x4(__U, __A, __B, 4);
}
__m512d test_mm512_shuffle_f64x2(__m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_shuffle_f64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+ // CHECK: shufflevector <8 x double> %0, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
return _mm512_shuffle_f64x2(__A, __B, 4);
}
__m512d test_mm512_mask_shuffle_f64x2(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_mask_shuffle_f64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_shuffle_f64x2(__W, __U, __A, __B, 4);
}
__m512d test_mm512_maskz_shuffle_f64x2(__mmask8 __U, __m512d __A, __m512d __B) {
// CHECK-LABEL: @test_mm512_maskz_shuffle_f64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+ // CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_shuffle_f64x2(__U, __A, __B, 4);
}
__m512i test_mm512_shuffle_i32x4(__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_shuffle_i32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+ // CHECK: shufflevector <8 x i64> %0, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
return _mm512_shuffle_i32x4(__A, __B, 4);
}
__m512i test_mm512_mask_shuffle_i32x4(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_shuffle_i32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_shuffle_i32x4(__W, __U, __A, __B, 4);
}
__m512i test_mm512_maskz_shuffle_i32x4(__mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_shuffle_i32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_shuffle_i32x4(__U, __A, __B, 4);
}
__m512i test_mm512_shuffle_i64x2(__m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_shuffle_i64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+ // CHECK: shufflevector <8 x i64> %0, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
return _mm512_shuffle_i64x2(__A, __B, 4);
}
__m512i test_mm512_mask_shuffle_i64x2(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_mask_shuffle_i64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_shuffle_i64x2(__W, __U, __A, __B, 4);
}
__m512i test_mm512_maskz_shuffle_i64x2(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: @test_mm512_maskz_shuffle_i64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+ // CHECK: shufflevector <8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_shuffle_i64x2(__U, __A, __B, 4);
}
Modified: cfe/trunk/test/CodeGen/avx512vl-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx512vl-builtins.c?rev=318025&r1=318024&r2=318025&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx512vl-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx512vl-builtins.c Mon Nov 13 01:15:31 2017
@@ -5602,73 +5602,85 @@ __m256i test_mm256_maskz_ternarylogic_ep
}
__m256 test_mm256_shuffle_f32x4(__m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_shuffle_f32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
return _mm256_shuffle_f32x4(__A, __B, 3);
}
__m256 test_mm256_mask_shuffle_f32x4(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_mask_shuffle_f32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_shuffle_f32x4(__W, __U, __A, __B, 3);
}
__m256 test_mm256_maskz_shuffle_f32x4(__mmask8 __U, __m256 __A, __m256 __B) {
// CHECK-LABEL: @test_mm256_maskz_shuffle_f32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.f32x4
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_shuffle_f32x4(__U, __A, __B, 3);
}
__m256d test_mm256_shuffle_f64x2(__m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_shuffle_f64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
return _mm256_shuffle_f64x2(__A, __B, 3);
}
__m256d test_mm256_mask_shuffle_f64x2(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_mask_shuffle_f64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_shuffle_f64x2(__W, __U, __A, __B, 3);
}
__m256d test_mm256_maskz_shuffle_f64x2(__mmask8 __U, __m256d __A, __m256d __B) {
// CHECK-LABEL: @test_mm256_maskz_shuffle_f64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.f64x2
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_shuffle_f64x2(__U, __A, __B, 3);
}
__m256i test_mm256_shuffle_i32x4(__m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_shuffle_i32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
return _mm256_shuffle_i32x4(__A, __B, 3);
}
__m256i test_mm256_mask_shuffle_i32x4(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_shuffle_i32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_mask_shuffle_i32x4(__W, __U, __A, __B, 3);
}
__m256i test_mm256_maskz_shuffle_i32x4(__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_shuffle_i32x4
- // CHECK: @llvm.x86.avx512.mask.shuf.i32x4
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
return _mm256_maskz_shuffle_i32x4(__U, __A, __B, 3);
}
__m256i test_mm256_shuffle_i64x2(__m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_shuffle_i64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
return _mm256_shuffle_i64x2(__A, __B, 3);
}
__m256i test_mm256_mask_shuffle_i64x2(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_mask_shuffle_i64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_mask_shuffle_i64x2(__W, __U, __A, __B, 3);
}
__m256i test_mm256_maskz_shuffle_i64x2(__mmask8 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: @test_mm256_maskz_shuffle_i64x2
- // CHECK: @llvm.x86.avx512.mask.shuf.i64x2
+ // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
+ // CHECK: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
return _mm256_maskz_shuffle_i64x2(__U, __A, __B, 3);
}
More information about the cfe-commits
mailing list