r334331 - [X86] Add back some masked vector truncate builtins. Custom IRgen a a few others.

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Fri Jun 8 14:50:09 PDT 2018


Author: ctopper
Date: Fri Jun  8 14:50:08 2018
New Revision: 334331

URL: http://llvm.org/viewvc/llvm-project?rev=334331&view=rev
Log:
[X86] Add back some masked vector truncate builtins. Custom IRgen a a few others.

I'd like to make the select builtins require an avx512f, avx512bw, or avx512vl fature to match what is normally required to get masking. Truncate is special in that there are instructions with a 128/256-bit masked result even without avx512vl.

By using special buitlins we can emit a select without using the 128/256-bit select builtins.

Modified:
    cfe/trunk/include/clang/Basic/BuiltinsX86.def
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/lib/Headers/avx512bwintrin.h
    cfe/trunk/lib/Headers/avx512fintrin.h

Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334331&r1=334330&r2=334331&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Jun  8 14:50:08 2018
@@ -1298,6 +1298,7 @@ TARGET_BUILTIN(__builtin_ia32_vpshrdw512
 
 TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq")
 TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
@@ -1648,6 +1649,7 @@ TARGET_BUILTIN(__builtin_ia32_pmovdw512_
 TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f")

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334331&r1=334330&r2=334331&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun  8 14:50:08 2018
@@ -9309,6 +9309,35 @@ Value *CodeGenFunction::EmitX86BuiltinEx
                                        makeArrayRef(Indices, DstNumElts),
                                        "insert");
   }
+  case X86::BI__builtin_ia32_pmovqd512_mask:
+  case X86::BI__builtin_ia32_pmovwb512_mask: {
+    Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+    return EmitX86Select(*this, Ops[2], Res, Ops[1]);
+  }
+  case X86::BI__builtin_ia32_pmovdb512_mask:
+  case X86::BI__builtin_ia32_pmovdw512_mask:
+  case X86::BI__builtin_ia32_pmovqw512_mask: {
+    if (const auto *C = dyn_cast<Constant>(Ops[2]))
+      if (C->isAllOnesValue())
+        return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
+
+    Intrinsic::ID IID;
+    switch (BuiltinID) {
+    default: llvm_unreachable("Unsupported intrinsic!");
+    case X86::BI__builtin_ia32_pmovdb512_mask:
+      IID = Intrinsic::x86_avx512_mask_pmov_db_512;
+      break;
+    case X86::BI__builtin_ia32_pmovdw512_mask:
+      IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
+      break;
+    case X86::BI__builtin_ia32_pmovqw512_mask:
+      IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
+      break;
+    }
+
+    Function *Intr = CGM.getIntrinsic(IID);
+    return Builder.CreateCall(Intr, Ops);
+  }
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:

Modified: cfe/trunk/lib/Headers/avx512bwintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512bwintrin.h?rev=334331&r1=334330&r2=334331&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512bwintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512bwintrin.h Fri Jun  8 14:50:08 2018
@@ -1080,21 +1080,23 @@ _mm512_maskz_cvtusepi16_epi8 (__mmask32
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_cvtepi16_epi8 (__m512i __A) {
-  return (__m256i)__builtin_convertvector((__v32hi)__A, __v32qi);
+  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+              (__v32qi) _mm256_undefined_si256(),
+              (__mmask32) -1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
-  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
-                                             (__v32qi)_mm512_cvtepi16_epi8(__A),
-                                             (__v32qi)__O);
+  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+              (__v32qi) __O,
+              __M);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
-  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
-                                             (__v32qi)_mm512_cvtepi16_epi8(__A),
-                                             (__v32qi)_mm256_setzero_si256());
+  return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
+              (__v32qi) _mm256_setzero_si256(),
+              __M);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS

Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=334331&r1=334330&r2=334331&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Fri Jun  8 14:50:08 2018
@@ -7402,7 +7402,9 @@ _mm512_mask_cvtusepi64_storeu_epi16 (voi
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_cvtepi32_epi8 (__m512i __A)
 {
-  return (__m128i)__builtin_convertvector((__v16si)__A, __v16qi);
+  return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
+              (__v16qi) _mm_undefined_si128 (),
+              (__mmask16) -1);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
@@ -7429,7 +7431,9 @@ _mm512_mask_cvtepi32_storeu_epi8 (void *
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_cvtepi32_epi16 (__m512i __A)
 {
-  return (__m256i)__builtin_convertvector((__v16si)__A, __v16hi);
+  return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
+              (__v16hi) _mm256_undefined_si256 (),
+              (__mmask16) -1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
@@ -7485,23 +7489,24 @@ _mm512_mask_cvtepi64_storeu_epi8 (void *
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_cvtepi64_epi32 (__m512i __A)
 {
-  return (__m256i)__builtin_convertvector((__v8di) __A, __v8si);
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+              (__v8si) _mm256_undefined_si256 (),
+              (__mmask8) -1);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
 {
-  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
-                                             (__v8si)_mm512_cvtepi64_epi32(__A),
-                                             (__v8si)__O);
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+              (__v8si) __O, __M);
 }
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
 {
-  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
-                                             (__v8si)_mm512_cvtepi64_epi32(__A),
-                                             (__v8si)_mm256_setzero_si256());
+  return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
+              (__v8si) _mm256_setzero_si256 (),
+              __M);
 }
 
 static __inline__ void __DEFAULT_FN_ATTRS
@@ -7513,7 +7518,9 @@ _mm512_mask_cvtepi64_storeu_epi32 (void*
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm512_cvtepi64_epi16 (__m512i __A)
 {
-  return (__m128i)__builtin_convertvector((__v8di)__A, __v8hi);
+  return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
+              (__v8hi) _mm_undefined_si128 (),
+              (__mmask8) -1);
 }
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS




More information about the cfe-commits mailing list