[llvm] r271363 - Revert r271362 "[AVX512] Remove masked load intrinsics. Clang now emits generic masked load intrinsics instead."

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue May 31 22:57:58 PDT 2016


Author: ctopper
Date: Wed Jun  1 00:57:55 2016
New Revision: 271363

URL: http://llvm.org/viewvc/llvm-project?rev=271363&view=rev
Log:
Revert r271362 "[AVX512] Remove masked load intrinsics. Clang now emits generic masked load intrinsics instead."

Looks like something isn't quite right still. Also forgot to move the test cases to an autoupgrade test.

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=271363&r1=271362&r2=271363&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Wed Jun  1 00:57:55 2016
@@ -1935,6 +1935,46 @@ let TargetPrefix = "x86" in {  // All in
   def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
                   [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_loadu_ps_128 :
+          Intrinsic<[llvm_v4f32_ty],
+                    [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_ps_256 :
+          Intrinsic<[llvm_v8f32_ty],
+                    [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_ps_512 :
+          Intrinsic<[llvm_v16f32_ty],
+                    [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_loadu_pd_128 :
+          Intrinsic<[llvm_v2f64_ty],
+                    [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_pd_256 :
+          Intrinsic<[llvm_v4f64_ty],
+                    [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_pd_512 :
+          Intrinsic<[llvm_v8f64_ty],
+                    [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_load_ps_128 :
+          Intrinsic<[llvm_v4f32_ty],
+                    [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_ps_256 :
+          Intrinsic<[llvm_v8f32_ty],
+                    [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_ps_512 :
+          Intrinsic<[llvm_v16f32_ty],
+                    [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_load_pd_128 :
+          Intrinsic<[llvm_v2f64_ty],
+                    [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_pd_256 :
+          Intrinsic<[llvm_v4f64_ty],
+                    [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_pd_512 :
+          Intrinsic<[llvm_v8f64_ty],
+                    [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
 }
 
 // Conditional move ops
@@ -2931,6 +2971,66 @@ let TargetPrefix = "x86" in {  // All in
   def int_x86_avx2_maskload_q_256 : GCCBuiltin<"__builtin_ia32_maskloadq256">,
         Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty, llvm_v4i64_ty],
                   [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_loadu_b_128 :
+           Intrinsic<[llvm_v16i8_ty],
+                     [llvm_ptr_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_b_256 :
+           Intrinsic<[llvm_v32i8_ty],
+                     [llvm_ptr_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_b_512 :
+           Intrinsic<[llvm_v64i8_ty],
+                     [llvm_ptr_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_loadu_w_128 :
+           Intrinsic<[llvm_v8i16_ty],
+                     [llvm_ptr_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_w_256 :
+           Intrinsic<[llvm_v16i16_ty],
+                     [llvm_ptr_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_w_512 :
+           Intrinsic<[llvm_v32i16_ty],
+                     [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_loadu_d_128 :
+           Intrinsic<[llvm_v4i32_ty],
+                     [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_d_256 :
+           Intrinsic<[llvm_v8i32_ty],
+                     [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_d_512 :
+           Intrinsic<[llvm_v16i32_ty],
+                     [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_loadu_q_128 :
+           Intrinsic<[llvm_v2i64_ty],
+                     [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_q_256 :
+           Intrinsic<[llvm_v4i64_ty],
+                     [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_loadu_q_512 :
+           Intrinsic<[llvm_v8i64_ty],
+                     [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_load_d_128 :
+           Intrinsic<[llvm_v4i32_ty],
+                     [llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_d_256 :
+           Intrinsic<[llvm_v8i32_ty],
+                     [llvm_ptr_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_d_512 :
+           Intrinsic<[llvm_v16i32_ty],
+                     [llvm_ptr_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrReadMem, IntrArgMemOnly]>;
+
+  def int_x86_avx512_mask_load_q_128 :
+           Intrinsic<[llvm_v2i64_ty],
+                     [llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_q_256 :
+           Intrinsic<[llvm_v4i64_ty],
+                     [llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
+  def int_x86_avx512_mask_load_q_512 :
+           Intrinsic<[llvm_v8i64_ty],
+                     [llvm_ptr_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrReadMem, IntrArgMemOnly]>;
 }
 
 // Conditional store ops

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=271363&r1=271362&r2=271363&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Wed Jun  1 00:57:55 2016
@@ -204,16 +204,6 @@ static bool UpgradeIntrinsicFunction1(Fu
         Name.startswith("x86.avx512.mask.store.w.") ||
         Name.startswith("x86.avx512.mask.store.d.") ||
         Name.startswith("x86.avx512.mask.store.q.") ||
-        Name.startswith("x86.avx512.mask.loadu.p") ||
-        Name.startswith("x86.avx512.mask.loadu.b.") ||
-        Name.startswith("x86.avx512.mask.loadu.w.") ||
-        Name.startswith("x86.avx512.mask.loadu.d.") ||
-        Name.startswith("x86.avx512.mask.loadu.q.") ||
-        Name.startswith("x86.avx512.mask.load.p") ||
-        Name.startswith("x86.avx512.mask.load.b.") ||
-        Name.startswith("x86.avx512.mask.load.w.") ||
-        Name.startswith("x86.avx512.mask.load.d.") ||
-        Name.startswith("x86.avx512.mask.load.q.") ||
         Name == "x86.sse42.crc32.64.8" ||
         Name.startswith("x86.avx.vbroadcast.s") ||
         Name.startswith("x86.sse2.psll.dq") ||
@@ -405,47 +395,13 @@ static Value *UpgradeMaskedStore(IRBuild
     for (unsigned i = 0; i != NumElts; ++i)
       Indices[i] = i;
     Mask = Builder.CreateShuffleVector(Mask, Mask,
-                                       makeArrayRef(Indices, NumElts),
-                                       "extract");
+                                           makeArrayRef(Indices, NumElts),
+                                           "extract");
   }
 
   return Builder.CreateMaskedStore(Data, Ptr, Align, Mask);
 }
 
-static Value *UpgradeMaskedLoad(IRBuilder<> &Builder, LLVMContext &C,
-                                Value *Ptr, Value *Passthru, Value *Mask,
-                                bool Aligned) {
-  // Cast the pointer to the right type.
-  Ptr = Builder.CreateBitCast(Ptr,
-                             llvm::PointerType::getUnqual(Passthru->getType()));
-  unsigned Align =
-    Aligned ? cast<VectorType>(Passthru->getType())->getBitWidth() / 8 : 1;
-
-  // If the mask is all ones just emit a regular store.
-  if (const auto *C = dyn_cast<Constant>(Mask))
-    if (C->isAllOnesValue())
-      return Builder.CreateAlignedLoad(Ptr, Align);
-
-  // Convert the mask from an integer type to a vector of i1.
-  unsigned NumElts = Passthru->getType()->getVectorNumElements();
-  llvm::VectorType *MaskTy = llvm::VectorType::get(Builder.getInt1Ty(),
-                             cast<IntegerType>(Mask->getType())->getBitWidth());
-  Mask = Builder.CreateBitCast(Mask, MaskTy);
-
-  // If we have less than 8 elements, then the starting mask was an i8 and
-  // we need to extract down to the right number of elements.
-  if (NumElts < 8) {
-    int Indices[4];
-    for (unsigned i = 0; i != NumElts; ++i)
-      Indices[i] = i;
-    Mask = Builder.CreateShuffleVector(Mask, Mask,
-                                       makeArrayRef(Indices, NumElts),
-                                       "extract");
-  }
-
-  return Builder.CreateMaskedLoad(Ptr, Align, Mask, Passthru);
-}
-
 // UpgradeIntrinsicCall - Upgrade a call to an old intrinsic to be a call the
 // upgraded intrinsic. All argument and return casting must be provided in
 // order to seamlessly integrate with existing context.
@@ -569,22 +525,6 @@ void llvm::UpgradeIntrinsicCall(CallInst
       // Remove intrinsic.
       CI->eraseFromParent();
       return;
-    } else if (Name.startswith("llvm.x86.avx512.mask.loadu.p") ||
-               Name.startswith("llvm.x86.avx512.mask.loadu.b.") ||
-               Name.startswith("llvm.x86.avx512.mask.loadu.w.") ||
-               Name.startswith("llvm.x86.avx512.mask.loadu.d.") ||
-               Name.startswith("llvm.x86.avx512.mask.loadu.q.")) {
-      Rep = UpgradeMaskedLoad(Builder, C, CI->getArgOperand(0),
-                              CI->getArgOperand(1), CI->getArgOperand(2),
-                              /*Aligned*/false);
-    } else if (Name.startswith("llvm.x86.avx512.mask.load.p") ||
-               Name.startswith("llvm.x86.avx512.mask.load.b.") ||
-               Name.startswith("llvm.x86.avx512.mask.load.w.") ||
-               Name.startswith("llvm.x86.avx512.mask.load.d.") ||
-               Name.startswith("llvm.x86.avx512.mask.load.q.")) {
-      Rep = UpgradeMaskedStore(Builder, C, CI->getArgOperand(0),
-                               CI->getArgOperand(1),CI->getArgOperand(2),
-                               /*Aligned*/true);
     } else if (Name.startswith("llvm.x86.xop.vpcom")) {
       Intrinsic::ID intID;
       if (Name.endswith("ub"))

Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=271363&r1=271362&r2=271363&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Wed Jun  1 00:57:55 2016
@@ -148,6 +148,36 @@ static const IntrinsicData IntrinsicsWit
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
                      EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_d_128, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_d_256, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_d_512, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_pd_128, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_pd_256, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_pd_512, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_ps_128, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_ps_256, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_ps_512, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_q_128, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_q_256, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_load_q_512, LOADA, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_b_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_b_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_b_512, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_d_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_d_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_d_512, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_pd_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_pd_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_pd_512, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_ps_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_ps_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_ps_512, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_q_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_q_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_q_512, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_w_128, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_w_256, LOADU, ISD::DELETED_NODE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_loadu_w_512, LOADU, ISD::DELETED_NODE, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=271363&r1=271362&r2=271363&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Wed Jun  1 00:57:55 2016
@@ -3202,8 +3202,8 @@ declare <32 x i16> @llvm.x86.avx512.mask
 define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_w_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
 ; AVX512BW-NEXT:    kmovd %edx, %k1
+; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm0
 ; AVX512BW-NEXT:    vmovdqu16 (%rsi), %zmm0 {%k1}
 ; AVX512BW-NEXT:    vmovdqu16 (%rdi), %zmm1 {%k1} {z}
 ; AVX512BW-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
@@ -3212,9 +3212,9 @@ define <32 x i16>@test_int_x86_avx512_ma
 ; AVX512F-32-LABEL: test_int_x86_avx512_mask_loadu_w_512:
 ; AVX512F-32:       # BB#0:
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; AVX512F-32-NEXT:    vmovdqu16 (%ecx), %zmm0
-; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
 ; AVX512F-32-NEXT:    vmovdqu16 (%eax), %zmm0 {%k1}
 ; AVX512F-32-NEXT:    vmovdqu16 (%ecx), %zmm1 {%k1} {z}
 ; AVX512F-32-NEXT:    vpaddw %zmm1, %zmm0, %zmm0
@@ -3231,8 +3231,8 @@ declare <64 x i8> @llvm.x86.avx512.mask.
 define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) {
 ; AVX512BW-LABEL: test_int_x86_avx512_mask_loadu_b_512:
 ; AVX512BW:       ## BB#0:
-; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm0
 ; AVX512BW-NEXT:    kmovq %rdx, %k1
+; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm0
 ; AVX512BW-NEXT:    vmovdqu8 (%rsi), %zmm0 {%k1}
 ; AVX512BW-NEXT:    vmovdqu8 (%rdi), %zmm1 {%k1} {z}
 ; AVX512BW-NEXT:    vpaddb %zmm1, %zmm0, %zmm0
@@ -3243,7 +3243,9 @@ define <64 x i8>@test_int_x86_avx512_mas
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; AVX512F-32-NEXT:    vmovdqu8 (%ecx), %zmm0
-; AVX512F-32-NEXT:    kmovq {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
+; AVX512F-32-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT:    kunpckdq %k0, %k1, %k1
 ; AVX512F-32-NEXT:    vmovdqu8 (%eax), %zmm0 {%k1}
 ; AVX512F-32-NEXT:    vmovdqu8 (%ecx), %zmm1 {%k1} {z}
 ; AVX512F-32-NEXT:    vpaddb %zmm1, %zmm0, %zmm0

Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=271363&r1=271362&r2=271363&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Wed Jun  1 00:57:55 2016
@@ -6230,8 +6230,8 @@ declare <8 x i16> @llvm.x86.avx512.mask.
 define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_w_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vmovdqu16 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x6f,0x07]
 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu16 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xff,0x08,0x6f,0x07]
 ; CHECK-NEXT:    vmovdqu16 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06]
 ; CHECK-NEXT:    vmovdqu16 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f]
 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
@@ -6248,8 +6248,8 @@ declare <16 x i16> @llvm.x86.avx512.mask
 define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_w_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vmovdqu16 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xff,0x28,0x6f,0x07]
 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu16 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xff,0x28,0x6f,0x07]
 ; CHECK-NEXT:    vmovdqu16 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06]
 ; CHECK-NEXT:    vmovdqu16 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f]
 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
@@ -6266,8 +6266,8 @@ declare <16 x i8> @llvm.x86.avx512.mask.
 define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_b_128:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vmovdqu8 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x07]
 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
+; CHECK-NEXT:    vmovdqu8 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x6f,0x07]
 ; CHECK-NEXT:    vmovdqu8 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06]
 ; CHECK-NEXT:    vmovdqu8 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f]
 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
@@ -6284,8 +6284,8 @@ declare <32 x i8> @llvm.x86.avx512.mask.
 define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_loadu_b_256:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vmovdqu8 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x07]
 ; CHECK-NEXT:    kmovd %edx, %k1 ## encoding: [0xc5,0xfb,0x92,0xca]
+; CHECK-NEXT:    vmovdqu8 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7f,0x28,0x6f,0x07]
 ; CHECK-NEXT:    vmovdqu8 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06]
 ; CHECK-NEXT:    vmovdqu8 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f]
 ; CHECK-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfc,0xc1]




More information about the llvm-commits mailing list