r313011 - [X86] Lower _mm[256|512]_[mask[z]]_avg_epu[8|16] intrinsics to native llvm IR
Galina Kistanova via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 12 11:16:29 PDT 2017
Hello Yael,
It looks like this commit broke one of our builders:
http://lab.llvm.org:8011/builders/llvm-clang-lld-x86_64-scei-ps4-ubuntu-fast/builds/17121
. . .
Failing Tests (1):
Clang :: Modules/builtins.m
Please have a look?
Thanks
Galina
On Tue, Sep 12, 2017 at 12:46 AM, Yael Tsafrir via cfe-commits <
cfe-commits at lists.llvm.org> wrote:
> Author: ytsafrir
> Date: Tue Sep 12 00:46:32 2017
> New Revision: 313011
>
> URL: http://llvm.org/viewvc/llvm-project?rev=313011&view=rev
> Log:
> [X86] Lower _mm[256|512]_[mask[z]]_avg_epu[8|16] intrinsics to native
> llvm IR
>
> Differential Revision: https://reviews.llvm.org/D37562
>
> Modified:
> cfe/trunk/include/clang/Basic/BuiltinsX86.def
> cfe/trunk/lib/Headers/avx2intrin.h
> cfe/trunk/lib/Headers/avx512bwintrin.h
> cfe/trunk/lib/Headers/emmintrin.h
> cfe/trunk/test/CodeGen/avx2-builtins.c
> cfe/trunk/test/CodeGen/avx512bw-builtins.c
> cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
> cfe/trunk/test/CodeGen/builtins-x86.c
> cfe/trunk/test/CodeGen/sse2-builtins.c
>
> Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/
> clang/Basic/BuiltinsX86.def?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
> +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Tue Sep 12 00:46:32 2017
> @@ -266,8 +266,6 @@ TARGET_BUILTIN(__builtin_ia32_paddusw128
> TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "", "sse2")
> TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "", "sse2")
> TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "", "sse2")
> -TARGET_BUILTIN(__builtin_ia32_pavgb128, "V16cV16cV16c", "", "sse2")
> -TARGET_BUILTIN(__builtin_ia32_pavgw128, "V8sV8sV8s", "", "sse2")
> TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "", "sse2")
> TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "", "sse2")
> TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "", "sse2")
> @@ -522,8 +520,6 @@ TARGET_BUILTIN(__builtin_ia32_paddusw256
> TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "", "avx2")
> TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "", "avx2")
> TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "", "avx2")
> -TARGET_BUILTIN(__builtin_ia32_pavgb256, "V32cV32cV32c", "", "avx2")
> -TARGET_BUILTIN(__builtin_ia32_pavgw256, "V16sV16sV16s", "", "avx2")
> TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "",
> "avx2")
> TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "", "avx2")
> TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "", "avx2")
> @@ -1075,8 +1071,6 @@ TARGET_BUILTIN(__builtin_ia32_paddsb512_
> TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "",
> "avx512bw")
> TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi",
> "", "avx512bw")
> TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "",
> "avx512bw")
> -TARGET_BUILTIN(__builtin_ia32_pavgb512_mask, "V64cV64cV64cV64cULLi", "",
> "avx512bw")
> -TARGET_BUILTIN(__builtin_ia32_pavgw512_mask, "V32sV32sV32sV32sUi", "",
> "avx512bw")
> TARGET_BUILTIN(__builtin_ia32_pmaxsb512_mask, "V64cV64cV64cV64cULLi",
> "", "avx512bw")
> TARGET_BUILTIN(__builtin_ia32_pmaxsw512_mask, "V32sV32sV32sV32sUi", "",
> "avx512bw")
> TARGET_BUILTIN(__builtin_ia32_pmaxub512_mask, "V64cV64cV64cV64cULLi",
> "", "avx512bw")
>
> Modified: cfe/trunk/lib/Headers/avx2intrin.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/
> avx2intrin.h?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/lib/Headers/avx2intrin.h (original)
> +++ cfe/trunk/lib/Headers/avx2intrin.h Tue Sep 12 00:46:32 2017
> @@ -145,13 +145,21 @@ _mm256_andnot_si256(__m256i __a, __m256i
> static __inline__ __m256i __DEFAULT_FN_ATTRS
> _mm256_avg_epu8(__m256i __a, __m256i __b)
> {
> - return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
> + typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
> + return (__m256i)__builtin_convertvector(
> + ((__builtin_convertvector((__v32qu)__a, __v32hu) +
> + __builtin_convertvector((__v32qu)__b, __v32hu)) + 1)
> + >> 1, __v32qu);
> }
>
> static __inline__ __m256i __DEFAULT_FN_ATTRS
> _mm256_avg_epu16(__m256i __a, __m256i __b)
> {
> - return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
> + typedef unsigned int __v16su __attribute__((__vector_size__(64)));
> + return (__m256i)__builtin_convertvector(
> + ((__builtin_convertvector((__v16hu)__a, __v16su) +
> + __builtin_convertvector((__v16hu)__b, __v16su)) + 1)
> + >> 1, __v16hu);
> }
>
> static __inline__ __m256i __DEFAULT_FN_ATTRS
>
> Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/
> avx512bwintrin.h?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
> +++ cfe/trunk/lib/Headers/avx512bwintrin.h Tue Sep 12 00:46:32 2017
> @@ -706,57 +706,55 @@ _mm512_maskz_adds_epu16 (__mmask32 __U,
> static __inline__ __m512i __DEFAULT_FN_ATTRS
> _mm512_avg_epu8 (__m512i __A, __m512i __B)
> {
> - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
> - (__v64qi) __B,
> - (__v64qi) _mm512_setzero_qi(),
> - (__mmask64) -1);
> + typedef unsigned short __v64hu __attribute__((__vector_size__(128)));
> + return (__m512i)__builtin_convertvector(
> + ((__builtin_convertvector((__v64qu) __A, __v64hu) +
> + __builtin_convertvector((__v64qu) __B, __v64hu)) + 1)
> + >> 1, __v64qu);
> }
>
> static __inline__ __m512i __DEFAULT_FN_ATTRS
> _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
> __m512i __B)
> {
> - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
> - (__v64qi) __B,
> - (__v64qi) __W,
> - (__mmask64) __U);
> + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
> + (__v64qi)_mm512_avg_epu8(__A, __B),
> + (__v64qi)__W);
> }
>
> static __inline__ __m512i __DEFAULT_FN_ATTRS
> _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
> {
> - return (__m512i) __builtin_ia32_pavgb512_mask ((__v64qi) __A,
> - (__v64qi) __B,
> - (__v64qi) _mm512_setzero_qi(),
> - (__mmask64) __U);
> + return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
> + (__v64qi)_mm512_avg_epu8(__A, __B),
> + (__v64qi)_mm512_setzero_qi());
> }
>
> static __inline__ __m512i __DEFAULT_FN_ATTRS
> _mm512_avg_epu16 (__m512i __A, __m512i __B)
> {
> - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
> - (__v32hi) __B,
> - (__v32hi) _mm512_setzero_hi(),
> - (__mmask32) -1);
> + typedef unsigned int __v32su __attribute__((__vector_size__(128)));
> + return (__m512i)__builtin_convertvector(
> + ((__builtin_convertvector((__v32hu) __A, __v32su) +
> + __builtin_convertvector((__v32hu) __B, __v32su)) + 1)
> + >> 1, __v32hu);
> }
>
> static __inline__ __m512i __DEFAULT_FN_ATTRS
> _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
> __m512i __B)
> {
> - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
> - (__v32hi) __B,
> - (__v32hi) __W,
> - (__mmask32) __U);
> + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
> + (__v32hi)_mm512_avg_epu16(__A, __B),
> + (__v32hi)__W);
> }
>
> static __inline__ __m512i __DEFAULT_FN_ATTRS
> _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
> {
> - return (__m512i) __builtin_ia32_pavgw512_mask ((__v32hi) __A,
> - (__v32hi) __B,
> - (__v32hi) _mm512_setzero_hi(),
> - (__mmask32) __U);
> + return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
> + (__v32hi)_mm512_avg_epu16(__A, __B),
> + (__v32hi) _mm512_setzero_hi());
> }
>
> static __inline__ __m512i __DEFAULT_FN_ATTRS
>
> Modified: cfe/trunk/lib/Headers/emmintrin.h
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/
> emmintrin.h?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/lib/Headers/emmintrin.h (original)
> +++ cfe/trunk/lib/Headers/emmintrin.h Tue Sep 12 00:46:32 2017
> @@ -2258,7 +2258,11 @@ _mm_adds_epu16(__m128i __a, __m128i __b)
> static __inline__ __m128i __DEFAULT_FN_ATTRS
> _mm_avg_epu8(__m128i __a, __m128i __b)
> {
> - return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
> + typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32)));
> + return (__m128i)__builtin_convertvector(
> + ((__builtin_convertvector((__v16qu)__a, __v16hu) +
> + __builtin_convertvector((__v16qu)__b, __v16hu)) + 1)
> + >> 1, __v16qu);
> }
>
> /// \brief Computes the rounded avarages of corresponding elements of two
> @@ -2278,7 +2282,11 @@ _mm_avg_epu8(__m128i __a, __m128i __b)
> static __inline__ __m128i __DEFAULT_FN_ATTRS
> _mm_avg_epu16(__m128i __a, __m128i __b)
> {
> - return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
> + typedef unsigned int __v8su __attribute__ ((__vector_size__ (32)));
> + return (__m128i)__builtin_convertvector(
> + ((__builtin_convertvector((__v8hu)__a, __v8su) +
> + __builtin_convertvector((__v8hu)__b, __v8su)) + 1)
> + >> 1, __v8hu);
> }
>
> /// \brief Multiplies the corresponding elements of two 128-bit signed [8
> x i16]
>
> Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/
> CodeGen/avx2-builtins.c?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx2-builtins.c Tue Sep 12 00:46:32 2017
> @@ -99,13 +99,25 @@ __m256i test_mm256_andnot_si256(__m256i
>
> __m256i test_mm256_avg_epu8(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_avg_epu8
> - // CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x
> i8> %{{.*}})
> + // CHECK-NOT: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}},
> <32 x i8> %{{.*}})
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> return _mm256_avg_epu8(a, b);
> }
>
> __m256i test_mm256_avg_epu16(__m256i a, __m256i b) {
> // CHECK-LABEL: test_mm256_avg_epu16
> - // CHECK: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16
> x i16> %{{.*}})
> + // CHECK-NOT: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}},
> <16 x i16> %{{.*}})
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: lshr <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> return _mm256_avg_epu16(a, b);
> }
>
>
> Modified: cfe/trunk/test/CodeGen/avx512bw-builtins.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/
> CodeGen/avx512bw-builtins.c?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/test/CodeGen/avx512bw-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx512bw-builtins.c Tue Sep 12 00:46:32 2017
> @@ -638,32 +638,74 @@ __m512i test_mm512_maskz_adds_epu16(__mm
> }
> __m512i test_mm512_avg_epu8(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_avg_epu8
> - // CHECK: @llvm.x86.avx512.mask.pavg.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> return _mm512_avg_epu8(__A,__B);
> }
> __m512i test_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_avg_epu8
> - // CHECK: @llvm.x86.avx512.mask.pavg.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> return _mm512_mask_avg_epu8(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_avg_epu8
> - // CHECK: @llvm.x86.avx512.mask.pavg.b.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.b.512
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: zext <64 x i8> %{{.*}} to <64 x i16>
> + // CHECK: add <64 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <64 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <64 x i16> %{{.*}} to <64 x i8>
> + // CHECK: store <64 x i8> zeroinitializer
> + // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
> return _mm512_maskz_avg_epu8(__U,__A,__B);
> }
> __m512i test_mm512_avg_epu16(__m512i __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_avg_epu16
> - // CHECK: @llvm.x86.avx512.mask.pavg.w.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: lshr <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> return _mm512_avg_epu16(__A,__B);
> }
> __m512i test_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i
> __A, __m512i __B) {
> // CHECK-LABEL: @test_mm512_mask_avg_epu16
> - // CHECK: @llvm.x86.avx512.mask.pavg.w.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: lshr <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> return _mm512_mask_avg_epu16(__W,__U,__A,__B);
> }
> __m512i test_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i
> __B) {
> // CHECK-LABEL: @test_mm512_maskz_avg_epu16
> - // CHECK: @llvm.x86.avx512.mask.pavg.w.512
> + // CHECK-NOT: @llvm.x86.avx512.mask.pavg.w.512
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: zext <32 x i16> %{{.*}} to <32 x i32>
> + // CHECK: add <32 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: lshr <32 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
> 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: trunc <32 x i32> %{{.*}} to <32 x i16>
> + // CHECK: store <32 x i16> zeroinitializer
> + // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16>
> %{{.*}}
> return _mm512_maskz_avg_epu16(__U,__A,__B);
> }
> __m512i test_mm512_max_epi8(__m512i __A, __m512i __B) {
>
> Modified: cfe/trunk/test/CodeGen/avx512vlbw-builtins.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/
> CodeGen/avx512vlbw-builtins.c?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/test/CodeGen/avx512vlbw-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/avx512vlbw-builtins.c Tue Sep 12 00:46:32 2017
> @@ -1155,49 +1155,101 @@ __m256i test_mm256_maskz_adds_epu16(__mm
> }
> __m128i test_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A,
> __m128i __B) {
> // CHECK-LABEL: @test_mm_mask_avg_epu8
> - // CHECK: @llvm.x86.sse2.pavg.b
> + // CHECK-NOT: @llvm.x86.sse2.pavg.b
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
> return _mm_mask_avg_epu8(__W,__U,__A,__B);
> }
> __m128i test_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
> // CHECK-LABEL: @test_mm_maskz_avg_epu8
> - // CHECK: @llvm.x86.sse2.pavg.b
> + // CHECK-NOT: @llvm.x86.sse2.pavg.b
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <16 x i16> %{{.*}} to <16 x i8>
> + // CHECK: store <2 x i64> zeroinitializer
> // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
> return _mm_maskz_avg_epu8(__U,__A,__B);
> }
> __m256i test_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i
> __A, __m256i __B) {
> // CHECK-LABEL: @test_mm256_mask_avg_epu8
> - // CHECK: @llvm.x86.avx2.pavg.b
> + // CHECK-NOT: @llvm.x86.avx2.pavg.b
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
> return _mm256_mask_avg_epu8(__W,__U,__A,__B);
> }
> __m256i test_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i
> __B) {
> // CHECK-LABEL: @test_mm256_maskz_avg_epu8
> - // CHECK: @llvm.x86.avx2.pavg.b
> + // CHECK-NOT: @llvm.x86.avx2.pavg.b
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: zext <32 x i8> %{{.*}} to <32 x i16>
> + // CHECK: add <32 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <32 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16
> 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: trunc <32 x i16> %{{.*}} to <32 x i8>
> + // CHECK: store <4 x i64> zeroinitializer
> // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
> return _mm256_maskz_avg_epu8(__U,__A,__B);
> }
> __m128i test_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A,
> __m128i __B) {
> // CHECK-LABEL: @test_mm_mask_avg_epu16
> - // CHECK: @llvm.x86.sse2.pavg.w
> + // CHECK-NOT: @llvm.x86.sse2.pavg.w
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1>
> + // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1>
> + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
> return _mm_mask_avg_epu16(__W,__U,__A,__B);
> }
> __m128i test_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
> // CHECK-LABEL: @test_mm_maskz_avg_epu16
> - // CHECK: @llvm.x86.sse2.pavg.w
> + // CHECK-NOT: @llvm.x86.sse2.pavg.w
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1>
> + // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1>
> + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
> + // CHECK: store <2 x i64> zeroinitializer
> // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
> return _mm_maskz_avg_epu16(__U,__A,__B);
> }
> __m256i test_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i
> __A, __m256i __B) {
> // CHECK-LABEL: @test_mm256_mask_avg_epu16
> - // CHECK: @llvm.x86.avx2.pavg.w
> + // CHECK-NOT: @llvm.x86.avx2.pavg.w
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: lshr <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
> return _mm256_mask_avg_epu16(__W,__U,__A,__B);
> }
> __m256i test_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i
> __B) {
> // CHECK-LABEL: @test_mm256_maskz_avg_epu16
> - // CHECK: @llvm.x86.avx2.pavg.w
> + // CHECK-NOT: @llvm.x86.avx2.pavg.w
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: zext <16 x i16> %{{.*}} to <16 x i32>
> + // CHECK: add <16 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: lshr <16 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
> + // CHECK: trunc <16 x i32> %{{.*}} to <16 x i16>
> + // CHECK: store <4 x i64> zeroinitializer
> // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16>
> %{{.*}}
> return _mm256_maskz_avg_epu16(__U,__A,__B);
> }
>
> Modified: cfe/trunk/test/CodeGen/builtins-x86.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/
> CodeGen/builtins-x86.c?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/test/CodeGen/builtins-x86.c (original)
> +++ cfe/trunk/test/CodeGen/builtins-x86.c Tue Sep 12 00:46:32 2017
> @@ -160,8 +160,6 @@ void f0() {
> tmp_V4s = __builtin_ia32_psubusw(tmp_V4s, tmp_V4s);
> tmp_V4s = __builtin_ia32_pmulhw(tmp_V4s, tmp_V4s);
> tmp_V4s = __builtin_ia32_pmulhuw(tmp_V4s, tmp_V4s);
> - tmp_V8c = __builtin_ia32_pavgb(tmp_V8c, tmp_V8c);
> - tmp_V4s = __builtin_ia32_pavgw(tmp_V4s, tmp_V4s);
> tmp_V8c = __builtin_ia32_pcmpeqb(tmp_V8c, tmp_V8c);
> tmp_V4s = __builtin_ia32_pcmpeqw(tmp_V4s, tmp_V4s);
> tmp_V2i = __builtin_ia32_pcmpeqd(tmp_V2i, tmp_V2i);
> @@ -201,8 +199,6 @@ void f0() {
> tmp_V16c = __builtin_ia32_psubusb128(tmp_V16c, tmp_V16c);
> tmp_V8s = __builtin_ia32_psubusw128(tmp_V8s, tmp_V8s);
> tmp_V8s = __builtin_ia32_pmulhw128(tmp_V8s, tmp_V8s);
> - tmp_V16c = __builtin_ia32_pavgb128(tmp_V16c, tmp_V16c);
> - tmp_V8s = __builtin_ia32_pavgw128(tmp_V8s, tmp_V8s);
> tmp_V16c = __builtin_ia32_pmaxub128(tmp_V16c, tmp_V16c);
> tmp_V8s = __builtin_ia32_pmaxsw128(tmp_V8s, tmp_V8s);
> tmp_V16c = __builtin_ia32_pminub128(tmp_V16c, tmp_V16c);
>
> Modified: cfe/trunk/test/CodeGen/sse2-builtins.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/
> CodeGen/sse2-builtins.c?rev=313011&r1=313010&r2=313011&view=diff
> ============================================================
> ==================
> --- cfe/trunk/test/CodeGen/sse2-builtins.c (original)
> +++ cfe/trunk/test/CodeGen/sse2-builtins.c Tue Sep 12 00:46:32 2017
> @@ -97,13 +97,25 @@ __m128i test_mm_andnot_si128(__m128i A,
>
> __m128i test_mm_avg_epu8(__m128i A, __m128i B) {
> // CHECK-LABEL: test_mm_avg_epu8
> - // CHECK: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}}, <16 x
> i8> %{{.*}})
> + // CHECK-NOT: call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %{{.*}},
> <16 x i8> %{{.*}})
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: zext <16 x i8> %{{.*}} to <16 x i16>
> + // CHECK: add <16 x i16> %{{.*}}, %{{.*}}
> + // CHECK: add <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK: lshr <16 x i16> %{{.*}}, <i16 1, i16 1, i16 1, i16 1, i16 1,
> i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
> + // CHECK:trunc <16 x i16> %{{.*}} to <16 x i8>
> return _mm_avg_epu8(A, B);
> }
>
> __m128i test_mm_avg_epu16(__m128i A, __m128i B) {
> // CHECK-LABEL: test_mm_avg_epu16
> - // CHECK: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}}, <8 x
> i16> %{{.*}})
> + // CHECK-NOT: call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %{{.*}},
> <8 x i16> %{{.*}})
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: zext <8 x i16> %{{.*}} to <8 x i32>
> + // CHECK: add <8 x i32> %{{.*}}, %{{.*}}
> + // CHECK: add <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1>
> + // CHECK: lshr <8 x i32> %{{.*}}, <i32 1, i32 1, i32 1, i32 1, i32 1,
> i32 1, i32 1, i32 1>
> + // CHECK: trunc <8 x i32> %{{.*}} to <8 x i16>
> return _mm_avg_epu16(A, B);
> }
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20170912/5bf5935a/attachment-0001.html>
More information about the cfe-commits
mailing list