[llvm] r271245 - [AVX512] Remove masked store intrinsics. Clang now emits generic masked store intrinsics instead.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon May 30 18:50:02 PDT 2016


Author: ctopper
Date: Mon May 30 20:50:02 2016
New Revision: 271245

URL: http://llvm.org/viewvc/llvm-project?rev=271245&view=rev
Log:
[AVX512] Remove masked store intrinsics. Clang now emits generic masked store intrinsics instead.

The intrinsics will be autoupgraded to the same generic masked stores.

Added:
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=271245&r1=271244&r2=271245&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Mon May 30 20:50:02 2016
@@ -2098,58 +2098,6 @@ let TargetPrefix = "x86" in {  // All in
         Intrinsic<[], [llvm_ptr_ty,
                   llvm_v8i32_ty, llvm_v8f32_ty], [IntrArgMemOnly]>;
 
-  def int_x86_avx512_mask_storeu_ps_128 :
-        GCCBuiltin<"__builtin_ia32_storeups128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_ps_256 :
-        GCCBuiltin<"__builtin_ia32_storeups256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_ps_512 :
-        GCCBuiltin<"__builtin_ia32_storeups512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
-                  [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_storeu_pd_128 :
-        GCCBuiltin<"__builtin_ia32_storeupd128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_pd_256 :
-        GCCBuiltin<"__builtin_ia32_storeupd256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_pd_512 :
-        GCCBuiltin<"__builtin_ia32_storeupd512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_store_ps_128 :
-        GCCBuiltin<"__builtin_ia32_storeaps128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_ps_256 :
-        GCCBuiltin<"__builtin_ia32_storeaps256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_ps_512 :
-        GCCBuiltin<"__builtin_ia32_storeaps512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
-                  [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_store_pd_128 :
-        GCCBuiltin<"__builtin_ia32_storeapd128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_pd_256 :
-        GCCBuiltin<"__builtin_ia32_storeapd256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_pd_512 :
-        GCCBuiltin<"__builtin_ia32_storeapd512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-
   def int_x86_avx512_mask_store_ss :
         GCCBuiltin<"__builtin_ia32_storess_mask">,
         Intrinsic<[], [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty],
@@ -3131,84 +3079,6 @@ let TargetPrefix = "x86" in {  // All in
         GCCBuiltin<"__builtin_ia32_maskstoreq256">,
         Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_v4i64_ty],
                   [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_storeu_b_128 :
-        GCCBuiltin<"__builtin_ia32_storedquqi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16i8_ty, llvm_i16_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_b_256 :
-        GCCBuiltin<"__builtin_ia32_storedquqi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v32i8_ty, llvm_i32_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_b_512 :
-        GCCBuiltin<"__builtin_ia32_storedquqi512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v64i8_ty, llvm_i64_ty],
-                  [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_storeu_w_128 :
-        GCCBuiltin<"__builtin_ia32_storedquhi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_w_256 :
-        GCCBuiltin<"__builtin_ia32_storedquhi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_w_512 :
-         GCCBuiltin<"__builtin_ia32_storedquhi512_mask">,
-         Intrinsic<[], [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty],
-                   [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_storeu_d_128 :
-        GCCBuiltin<"__builtin_ia32_storedqusi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_d_256 :
-        GCCBuiltin<"__builtin_ia32_storedqusi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_d_512 :
-        GCCBuiltin<"__builtin_ia32_storedqusi512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
-                  [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_storeu_q_128 :
-        GCCBuiltin<"__builtin_ia32_storedqudi128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_q_256 :
-        GCCBuiltin<"__builtin_ia32_storedqudi256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_storeu_q_512 :
-        GCCBuiltin<"__builtin_ia32_storedqudi512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_store_d_128 :
-        GCCBuiltin<"__builtin_ia32_movdqa32store128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_d_256 :
-        GCCBuiltin<"__builtin_ia32_movdqa32store256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_d_512 :
-        GCCBuiltin<"__builtin_ia32_movdqa32store512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty],
-                  [IntrArgMemOnly]>;
-
-  def int_x86_avx512_mask_store_q_128 :
-        GCCBuiltin<"__builtin_ia32_movdqa64store128_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_q_256 :
-        GCCBuiltin<"__builtin_ia32_movdqa64store256_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
-  def int_x86_avx512_mask_store_q_512 :
-        GCCBuiltin<"__builtin_ia32_movdqa64store512_mask">,
-        Intrinsic<[], [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty],
-                  [IntrArgMemOnly]>;
 }
 
 // Variable bit shift ops

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=271245&r1=271244&r2=271245&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Mon May 30 20:50:02 2016
@@ -194,6 +194,16 @@ static bool UpgradeIntrinsicFunction1(Fu
         Name.startswith("x86.sse.storeu.") ||
         Name.startswith("x86.sse2.storeu.") ||
         Name.startswith("x86.avx.storeu.") ||
+        Name.startswith("x86.avx512.mask.storeu.p") ||
+        Name.startswith("x86.avx512.mask.storeu.b.") ||
+        Name.startswith("x86.avx512.mask.storeu.w.") ||
+        Name.startswith("x86.avx512.mask.storeu.d.") ||
+        Name.startswith("x86.avx512.mask.storeu.q.") ||
+        Name.startswith("x86.avx512.mask.store.p") ||
+        Name.startswith("x86.avx512.mask.store.b.") ||
+        Name.startswith("x86.avx512.mask.store.w.") ||
+        Name.startswith("x86.avx512.mask.store.d.") ||
+        Name.startswith("x86.avx512.mask.store.q.") ||
         Name == "x86.sse42.crc32.64.8" ||
         Name.startswith("x86.avx.vbroadcast.s") ||
         Name.startswith("x86.sse2.psll.dq") ||
@@ -358,6 +368,40 @@ static Value *UpgradeX86PSRLDQIntrinsics
   return Builder.CreateBitCast(Res, ResultTy, "cast");
 }
 
+static Value *UpgradeMaskedStore(IRBuilder<> &Builder, LLVMContext &C,
+                                 Value *Ptr, Value *Data, Value *Mask,
+                                 bool Aligned) {
+  // Cast the pointer to the right type.
+  Ptr = Builder.CreateBitCast(Ptr,
+                              llvm::PointerType::getUnqual(Data->getType()));
+  unsigned Align =
+    Aligned ? cast<VectorType>(Data->getType())->getBitWidth() / 8 : 1;
+
+  // If the mask is all ones just emit a regular store.
+  if (const auto *C = dyn_cast<Constant>(Mask))
+    if (C->isAllOnesValue())
+      return Builder.CreateAlignedStore(Data, Ptr, Align);
+
+  // Convert the mask from an integer type to a vector of i1.
+  unsigned NumElts = Data->getType()->getVectorNumElements();
+  llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
+                             cast<IntegerType>(Mask->getType())->getBitWidth());
+  Mask = Builder.CreateBitCast(Mask, MaskTy);
+
+  // If we have less than 8 elements, then the starting mask was an i8 and
+  // we need to extract down to the right number of elements.
+  if (NumElts < 8) {
+    int Indices[4];
+    for (unsigned i = 0; i != NumElts; ++i)
+      Indices[i] = i;
+    Mask = Builder.CreateShuffleVector(Mask, Mask,
+                                           makeArrayRef(Indices, NumElts),
+                                           "extract");
+  }
+
+  return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
+}
+
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
 // upgraded intrinsic. All argument and return casting must be provided in
 // order to seamlessly integrate with existing context.
@@ -458,6 +502,28 @@ void llvm::UpgradeIntrinsicCall(CallInst
 
       // Remove intrinsic.
       CI->eraseFromParent();
+      return;
+    } else if (Name.startswith("llvm.x86.avx512.mask.storeu.p") ||
+               Name.startswith("llvm.x86.avx512.mask.storeu.b.") ||
+               Name.startswith("llvm.x86.avx512.mask.storeu.w.") ||
+               Name.startswith("llvm.x86.avx512.mask.storeu.d.") ||
+               Name.startswith("llvm.x86.avx512.mask.storeu.q.")) {
+      UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(2), /*Aligned*/false);
+
+      // Remove intrinsic.
+      CI->eraseFromParent();
+      return;
+    } else if (Name.startswith("llvm.x86.avx512.mask.store.p") ||
+               Name.startswith("llvm.x86.avx512.mask.store.b.") ||
+               Name.startswith("llvm.x86.avx512.mask.store.w.") ||
+               Name.startswith("llvm.x86.avx512.mask.store.d.") ||
+               Name.startswith("llvm.x86.avx512.mask.store.q.")) {
+      UpgradeMaskedStore(Builder, C, CI->getArgOperand(0), CI->getArgOperand(1),
+                         CI->getArgOperand(2), /*Aligned*/true);
+
+      // Remove intrinsic.
+      CI->eraseFromParent();
       return;
     } else if (Name.startswith("llvm.x86.xop.vpcom")) {
       Intrinsic::ID intID;

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=271245&r1=271244&r2=271245&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Mon May 30 20:50:02 2016
@@ -214,36 +214,6 @@ static const IntrinsicData IntrinsicsWit
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_d_128, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_d_256, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_d_512, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_pd_128, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_pd_256, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_pd_512, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_ps_128, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_ps_256, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_ps_512, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_q_128, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_q_256, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_store_q_512, STOREA, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_b_128, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_b_256, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_b_512, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_d_128, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_d_256, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_d_512, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_pd_128, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_pd_256, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_pd_512, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_ps_128, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_ps_256, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_ps_512, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_q_128, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_q_256, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_q_512, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_w_128, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_w_256, STOREU, ISD::DELETED_NODE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_storeu_w_512, STOREU, ISD::DELETED_NODE, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),

Added: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=271245&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Mon May 30 20:50:02 2016
@@ -0,0 +1,114 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+
+define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
+; CHECK-LABEL: test_store1:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovups %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovups %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
+  call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
+
+define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
+; CHECK-LABEL: test_store2:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovupd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovupd %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
+  call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
+
+define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
+; CHECK-LABEL: test_mask_store_aligned_ps:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovaps %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovaps %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
+  call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
+
+define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
+; CHECK-LABEL: test_mask_store_aligned_pd:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovapd %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovapd %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
+  call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovdqu64 %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovdqu32 %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
+  call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16)
+
+define void at test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_q_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovdqa64 %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8)
+
+define void at test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_d_512:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    vmovdqa32 %zmm0, (%rdi) {%k1}
+; CHECK-NEXT:    vmovdqa32 %zmm0, (%rsi)
+; CHECK-NEXT:    retq
+  call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
+  call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16)

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=271245&r1=271244&r2=271245&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Mon May 30 20:50:02 2016
@@ -972,62 +972,6 @@ define i16 @test_vptestmd(<16 x i32> %a0
 }
 declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
 
-define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
-; CHECK-LABEL: test_store1:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovups %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovups %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
-  call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
-
-define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
-; CHECK-LABEL: test_store2:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovupd %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovupd %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
-  call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
-
-define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
-; CHECK-LABEL: test_mask_store_aligned_ps:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovaps %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovaps %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
-  call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
-
-define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
-; CHECK-LABEL: test_mask_store_aligned_pd:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovapd %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovapd %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
-  call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
-
 define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
 ; CHECK-LABEL: test_mask_load_aligned_ps:
 ; CHECK:       ## BB#0:
@@ -1044,62 +988,6 @@ define <16 x float> @test_mask_load_alig
   ret <16 x float> %res4
 }
 
-declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_512:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovdqu64 %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16)
-
-define void at test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_512:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovdqu32 %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
-  call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8)
-
-define void at test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_q_512:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovdqa64 %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovdqa64 %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16)
-
-define void at test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_d_512:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1
-; CHECK-NEXT:    vmovdqa32 %zmm0, (%rdi) {%k1}
-; CHECK-NEXT:    vmovdqa32 %zmm0, (%rsi)
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
-  call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
-  ret void
-}
-
 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
 
 define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {

Added: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=271245&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Mon May 30 20:50:02 2016
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
+
+declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)
+
+define void at test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovq %rdx, %k1
+; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rsi)
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_b_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vmovdqu8 %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT:    vmovdqu8 %zmm0, (%eax)
+; AVX512F-32-NEXT:    retl
+  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2)
+  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32)
+
+define void at test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_w_512:
+; AVX512BW:       ## BB#0:
+; AVX512BW-NEXT:    kmovd %edx, %k1
+; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rdi) {%k1}
+; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rsi)
+; AVX512BW-NEXT:    retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_w_512:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    vmovdqu16 %zmm0, (%ecx) {%k1}
+; AVX512F-32-NEXT:    vmovdqu16 %zmm0, (%eax)
+; AVX512F-32-NEXT:    retl
+  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2)
+  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1)
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=271245&r1=271244&r2=271245&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Mon May 30 20:50:02 2016
@@ -3347,54 +3347,6 @@ define <32 x i16>@test_int_x86_avx512_ma
   ret <32 x i16> %res4
 }
 
-declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)
-
-define void at test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {
-; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_b_512:
-; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    kmovq %rdx, %k1
-; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rdi) {%k1}
-; AVX512BW-NEXT:    vmovdqu8 %zmm0, (%rsi)
-; AVX512BW-NEXT:    retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_b_512:
-; AVX512F-32:       # BB#0:
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
-; AVX512F-32-NEXT:    vmovdqu8 %zmm0, (%ecx) {%k1}
-; AVX512F-32-NEXT:    vmovdqu8 %zmm0, (%eax)
-; AVX512F-32-NEXT:    retl
-  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2)
-  call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32)
-
-define void at test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) {
-; AVX512BW-LABEL: test_int_x86_avx512_mask_storeu_w_512:
-; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    kmovd %edx, %k1
-; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rdi) {%k1}
-; AVX512BW-NEXT:    vmovdqu16 %zmm0, (%rsi)
-; AVX512BW-NEXT:    retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_mask_storeu_w_512:
-; AVX512F-32:       # BB#0:
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT:    vmovdqu16 %zmm0, (%ecx) {%k1}
-; AVX512F-32-NEXT:    vmovdqu16 %zmm0, (%eax)
-; AVX512F-32-NEXT:    retl
-  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2)
-  call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1)
-  ret void
-}
-
 declare <32 x i16> @llvm.x86.avx512.mask.movu.w.512(<32 x i16>, <32 x i16>, i32)
 
 define <32 x i16>@test_int_x86_avx512_mask_movu_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {

Added: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll?rev=271245&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll Mon May 30 20:50:02 2016
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+
+declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16)
+
+define void at test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu8 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu8 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2)
+  call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32)
+
+define void at test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca]
+; CHECK-NEXT:    vmovdqu8 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu8 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2)
+  call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu16 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu16 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16)
+
+define void at test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu16 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2)
+  call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1)
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=271245&r1=271244&r2=271245&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Mon May 30 20:50:02 2016
@@ -6464,62 +6464,6 @@ define <16 x i16>@test_int_x86_avx512_ma
   ret <16 x i16> %res4
 }
 
-declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16)
-
-define void at test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqu8 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu8 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x08,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2)
-  call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32)
-
-define void at test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_b_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca]
-; CHECK-NEXT:    vmovdqu8 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu8 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7f,0x28,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2)
-  call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqu16 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu16 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x08,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16)
-
-define void at test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_w_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqu16 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu16 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xff,0x28,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2)
-  call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1)
-  ret void
-}
-
 declare <8 x i16> @llvm.x86.avx512.mask.movu.w.128(<8 x i16>, <8 x i16>, i8)
 
 define <8 x i16>@test_int_x86_avx512_mask_movu_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {

Added: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=271245&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (added)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Mon May 30 20:50:02 2016
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+
+declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
+
+define void at test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
+; CHECK-NEXT:    vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
+
+define void at test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
+; CHECK-NEXT:    vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
+; CHECK-NEXT:    vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
+; CHECK-NEXT:    vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
+
+define void at test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
+; CHECK-NEXT:    vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
+    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
+    ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
+
+define void at test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
+; CHECK-NEXT:    vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
+    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
+    ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
+; CHECK-NEXT:    vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
+    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
+    ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
+; CHECK-NEXT:    vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
+    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
+    ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
+
+define void at test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
+; CHECK-NEXT:    vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
+
+define void at test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
+; CHECK-NEXT:    vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
+
+define void at test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
+; CHECK-NEXT:    vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
+
+define void at test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
+; CHECK-NEXT:    vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
+  ret void
+}
+
+declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
+
+define void at test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256:
+; CHECK:       ## BB#0:
+; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
+; CHECK-NEXT:    vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06]
+; CHECK-NEXT:    retq ## encoding: [0xc3]
+  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
+  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=271245&r1=271244&r2=271245&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Mon May 30 20:50:02 2016
@@ -9411,230 +9411,6 @@ define <8 x i32>@test_int_x86_avx512_mas
     ret <8 x i32> %res2
 }
 
-declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
-
-define void at test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
-; CHECK-NEXT:    vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
-
-define void at test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
-; CHECK-NEXT:    vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
-; CHECK-NEXT:    vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
-; CHECK-NEXT:    vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
-
-define void at test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
-; CHECK-NEXT:    vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
-    call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
-    ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
-
-define void at test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
-; CHECK-NEXT:    vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
-    call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
-    ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
-; CHECK-NEXT:    vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
-    call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
-    ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
-; CHECK-NEXT:    vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
-    call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
-    ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
-
-define void at test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
-; CHECK-NEXT:    vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
-
-define void at test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
-; CHECK-NEXT:    vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
-
-define void at test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
-; CHECK-NEXT:    vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
-
-define void at test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
-; CHECK-NEXT:    vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
-  ret void
-}
-
-declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
-
-define void at test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256:
-; CHECK:       ## BB#0:
-; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
-; CHECK-NEXT:    vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
-; CHECK-NEXT:    vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06]
-; CHECK-NEXT:    retq ## encoding: [0xc3]
-  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
-  call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
-  ret void
-}
-
 declare <2 x double> @llvm.x86.avx512.mask.fixupimm.pd.128(<2 x double>, <2 x double>, <2 x i64>, i32, i8)
 
 define <2 x double>@test_int_x86_avx512_mask_fixupimm_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) {




More information about the llvm-commits mailing list