[llvm] r236979 - AVX-512: Changed CC parameter in "cmp" intrinsic

Elena Demikhovsky elena.demikhovsky at intel.com
Mon May 11 02:03:14 PDT 2015


Author: delena
Date: Mon May 11 04:03:14 2015
New Revision: 236979

URL: http://llvm.org/viewvc/llvm-project?rev=236979&view=rev
Log:
AVX-512: Changed CC parameter in "cmp" intrinsic
from i8 to i32 according to the Intel Spec

by Igor Breger (igor.breger at intel.com)

Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsX86.td
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll

Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=236979&r1=236978&r2=236979&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Mon May 11 04:03:14 2015
@@ -3917,29 +3917,29 @@ let TargetPrefix = "x86" in {
                   [IntrNoMem]>;
 
   def int_x86_avx512_mask_cmp_b_512: GCCBuiltin<"__builtin_ia32_cmpb512_mask">,
-        Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty,
                   llvm_i64_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_w_512: GCCBuiltin<"__builtin_ia32_cmpw512_mask">,
-        Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty,
                   llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_d_512: GCCBuiltin<"__builtin_ia32_cmpd512_mask">,
-        Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
                   llvm_i16_ty], [IntrNoMem ]>;
   def int_x86_avx512_mask_cmp_q_512: GCCBuiltin<"__builtin_ia32_cmpq512_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 
   def int_x86_avx512_mask_ucmp_b_512: GCCBuiltin<"__builtin_ia32_ucmpb512_mask">,
-        Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i32_ty,
                   llvm_i64_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_w_512: GCCBuiltin<"__builtin_ia32_ucmpw512_mask">,
-        Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty,
                   llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_d_512: GCCBuiltin<"__builtin_ia32_ucmpd512_mask">,
-        Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty,
                   llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_q_512: GCCBuiltin<"__builtin_ia32_ucmpq512_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 
   // 256-bit
@@ -3970,29 +3970,29 @@ let TargetPrefix = "x86" in {
                   [IntrNoMem]>;
 
   def int_x86_avx512_mask_cmp_b_256: GCCBuiltin<"__builtin_ia32_cmpb256_mask">,
-        Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty,
                   llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_w_256: GCCBuiltin<"__builtin_ia32_cmpw256_mask">,
-        Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty,
                   llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_d_256: GCCBuiltin<"__builtin_ia32_cmpd256_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_q_256: GCCBuiltin<"__builtin_ia32_cmpq256_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 
   def int_x86_avx512_mask_ucmp_b_256: GCCBuiltin<"__builtin_ia32_ucmpb256_mask">,
-        Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty,
                   llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_w_256: GCCBuiltin<"__builtin_ia32_ucmpw256_mask">,
-        Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty, llvm_i32_ty,
                   llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_d_256: GCCBuiltin<"__builtin_ia32_ucmpd256_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_q_256: GCCBuiltin<"__builtin_ia32_ucmpq256_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 
   // 128-bit
@@ -4023,29 +4023,29 @@ let TargetPrefix = "x86" in {
                   [IntrNoMem]>;
 
   def int_x86_avx512_mask_cmp_b_128: GCCBuiltin<"__builtin_ia32_cmpb128_mask">,
-        Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
                   llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_w_128: GCCBuiltin<"__builtin_ia32_cmpw128_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_d_128: GCCBuiltin<"__builtin_ia32_cmpd128_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_cmp_q_128: GCCBuiltin<"__builtin_ia32_cmpq128_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 
   def int_x86_avx512_mask_ucmp_b_128: GCCBuiltin<"__builtin_ia32_ucmpb128_mask">,
-        Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty,
                   llvm_i16_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_w_128: GCCBuiltin<"__builtin_ia32_ucmpw128_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_d_128: GCCBuiltin<"__builtin_ia32_ucmpd128_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_ucmp_q_128: GCCBuiltin<"__builtin_ia32_ucmpq128_mask">,
-        Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty,
+        Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty,
                   llvm_i8_ty], [IntrNoMem]>;
 }
 

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=236979&r1=236978&r2=236979&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Mon May 11 04:03:14 2015
@@ -62,21 +62,6 @@ static bool UpgradeX86IntrinsicsWith8Bit
   return true;
 }
 
-// Upgrade the declarations of AVX-512 cmp intrinsic functions whose 8-bit
-// immediates have changed their type from i32 to i8.
-static bool UpgradeAVX512CmpIntrinsic(Function *F, Intrinsic::ID IID,
-                                      Function *&NewFn) {
-  // Check that the last argument is an i32.
-  Type *LastArgType = F->getFunctionType()->getParamType(2);
-  if (!LastArgType->isIntegerTy(32))
-    return false;
-
-  // Move this function aside and map down.
-  F->setName(F->getName() + ".old");
-  NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
-  return true;
-}
-
 static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
   assert(F && "Illegal to upgrade a non-existent Function.");
 
@@ -210,80 +195,6 @@ static bool UpgradeIntrinsicFunction1(Fu
     if (Name == "x86.avx2.mpsadbw")
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
                                               NewFn);
-    if (Name == "x86.avx512.mask.cmp.b.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_512,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.w.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_512,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.d.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_512,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.q.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_512,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.b.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_512,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.w.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_512,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.d.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_512,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.q.512")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_512,
-                                       NewFn);
-
-    if (Name == "x86.avx512.mask.cmp.b.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_256,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.w.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_256,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.d.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_256,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.q.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_256,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.b.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_256,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.w.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_256,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.d.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_256,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.q.256")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_256,
-                                       NewFn);
-
-    if (Name == "x86.avx512.mask.cmp.b.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_b_128,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.w.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_w_128,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.d.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_d_128,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.cmp.q.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_cmp_q_128,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.b.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_b_128,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.w.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_w_128,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.d.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_d_128,
-                                       NewFn);
-    if (Name == "x86.avx512.mask.ucmp.q.128")
-      return UpgradeAVX512CmpIntrinsic(F, Intrinsic::x86_avx512_mask_ucmp_q_128,
-                                       NewFn);
 
     // frcz.ss/sd may need to have an argument dropped
     if (Name.startswith("x86.xop.vfrcz.ss") && F->arg_size() == 2) {

Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=236979&r1=236978&r2=236979&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Mon May 11 04:03:14 2015
@@ -718,28 +718,28 @@ declare i8 @llvm.x86.avx512.mask.pcmpgt.
 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK_LABEL: test_cmp_d_512
 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 ##
-  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 -1)
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 ##
-  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 -1)
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 ##
-  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 -1)
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 ##
-  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 -1)
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 ##
-  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 -1)
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 ##
-  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 -1)
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 ##
-  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 -1)
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 ##
-  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 -1)
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
@@ -747,59 +747,59 @@ define <8 x i16> @test_cmp_d_512(<16 x i
 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
 ; CHECK_LABEL: test_mask_cmp_d_512
 ; CHECK: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 %mask)
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltd %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 %mask)
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpled %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 %mask)
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordd %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 %mask)
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpneqd %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 %mask)
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 %mask)
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnled %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 %mask)
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordd %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 %mask)
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
-declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i8, i16) nounwind readnone
+declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
 
 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
 ; CHECK_LABEL: test_ucmp_d_512
 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 ##
-  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 -1)
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 ##
-  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 -1)
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 ##
-  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 -1)
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 ##
-  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 -1)
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 ##
-  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 -1)
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 ##
-  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 -1)
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 ##
-  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 -1)
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 ##
-  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 -1)
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
@@ -807,59 +807,59 @@ define <8 x i16> @test_ucmp_d_512(<16 x
 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_d_512
 ; CHECK: vpcmpequd %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 0, i16 %mask)
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 1, i16 %mask)
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleud %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 2, i16 %mask)
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordud %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 3, i16 %mask)
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpnequd %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 4, i16 %mask)
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltud %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 5, i16 %mask)
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleud %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 6, i16 %mask)
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordud %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i8 7, i16 %mask)
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
-declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i8, i16) nounwind readnone
+declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
 
 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
 ; CHECK_LABEL: test_cmp_q_512
 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -867,59 +867,59 @@ define <8 x i8> @test_cmp_q_512(<8 x i64
 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 ; CHECK_LABEL: test_mask_cmp_q_512
 ; CHECK: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltq %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleq %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordq %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqq %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleq %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordq %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
 ; CHECK_LABEL: test_ucmp_q_512
 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -927,33 +927,33 @@ define <8 x i8> @test_ucmp_q_512(<8 x i6
 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_q_512
 ; CHECK: vpcmpequq %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuq %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduq %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequq %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuq %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuq %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduq %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
 
 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
 ; CHECK-LABEL: test_mask_vextractf32x4:

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=236979&r1=236978&r2=236979&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Mon May 11 04:03:14 2015
@@ -67,28 +67,28 @@ declare i32 @llvm.x86.avx512.mask.pcmpgt
 define <8 x i64> @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
 ; CHECK_LABEL: test_cmp_b_512
 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 ##
-  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1)
+  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
   %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 ##
-  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1)
+  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
   %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 ##
-  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1)
+  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
   %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 ##
-  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1)
+  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
   %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 ##
-  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1)
+  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
   %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 ##
-  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1)
+  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
   %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 ##
-  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1)
+  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
   %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 ##
-  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1)
+  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
   %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
   ret <8 x i64> %vec7
 }
@@ -96,59 +96,59 @@ define <8 x i64> @test_cmp_b_512(<64 x i
 define <8 x i64> @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
 ; CHECK_LABEL: test_mask_cmp_b_512
 ; CHECK: vpcmpeqb %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask)
+  %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
   %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
 ; CHECK: vpcmpltb %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask)
+  %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
   %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
 ; CHECK: vpcmpleb %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask)
+  %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
   %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
 ; CHECK: vpcmpunordb %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask)
+  %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
   %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
 ; CHECK: vpcmpneqb %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask)
+  %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
   %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
 ; CHECK: vpcmpnltb %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask)
+  %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
   %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
 ; CHECK: vpcmpnleb %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask)
+  %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
   %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
 ; CHECK: vpcmpordb %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask)
+  %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
   %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
   ret <8 x i64> %vec7
 }
 
-declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone
+declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
 
 define <8 x i64> @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
 ; CHECK_LABEL: test_ucmp_b_512
 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 ##
-  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 -1)
+  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
   %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 ##
-  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 -1)
+  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
   %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 ##
-  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 -1)
+  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
   %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 ##
-  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 -1)
+  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
   %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 ##
-  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 -1)
+  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
   %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 ##
-  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 -1)
+  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
   %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 ##
-  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 -1)
+  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
   %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 ##
-  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 -1)
+  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
   %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
   ret <8 x i64> %vec7
 }
@@ -156,59 +156,59 @@ define <8 x i64> @test_ucmp_b_512(<64 x
 define <8 x i64> @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_b_512
 ; CHECK: vpcmpequb %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 0, i64 %mask)
+  %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
   %vec0 = insertelement <8 x i64> undef, i64 %res0, i32 0
 ; CHECK: vpcmpltub %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 1, i64 %mask)
+  %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
   %vec1 = insertelement <8 x i64> %vec0, i64 %res1, i32 1
 ; CHECK: vpcmpleub %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 2, i64 %mask)
+  %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
   %vec2 = insertelement <8 x i64> %vec1, i64 %res2, i32 2
 ; CHECK: vpcmpunordub %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 3, i64 %mask)
+  %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
   %vec3 = insertelement <8 x i64> %vec2, i64 %res3, i32 3
 ; CHECK: vpcmpnequb %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 4, i64 %mask)
+  %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
   %vec4 = insertelement <8 x i64> %vec3, i64 %res4, i32 4
 ; CHECK: vpcmpnltub %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 5, i64 %mask)
+  %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
   %vec5 = insertelement <8 x i64> %vec4, i64 %res5, i32 5
 ; CHECK: vpcmpnleub %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 6, i64 %mask)
+  %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
   %vec6 = insertelement <8 x i64> %vec5, i64 %res6, i32 6
 ; CHECK: vpcmpordub %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i8 7, i64 %mask)
+  %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
   %vec7 = insertelement <8 x i64> %vec6, i64 %res7, i32 7
   ret <8 x i64> %vec7
 }
 
-declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i8, i64) nounwind readnone
+declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
 
 define <8 x i32> @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
 ; CHECK_LABEL: test_cmp_w_512
 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 ##
-  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1)
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 ##
-  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1)
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 ##
-  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1)
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 ##
-  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1)
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 ##
-  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1)
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 ##
-  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1)
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 ##
-  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1)
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 ##
-  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1)
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
@@ -216,59 +216,59 @@ define <8 x i32> @test_cmp_w_512(<32 x i
 define <8 x i32> @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
 ; CHECK_LABEL: test_mask_cmp_w_512
 ; CHECK: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask)
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltw %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask)
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmplew %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask)
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunordw %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask)
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpneqw %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask)
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltw %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask)
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnlew %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask)
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmpordw %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask)
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
 
-declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
 
 define <8 x i32> @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
 ; CHECK_LABEL: test_ucmp_w_512
 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 ##
-  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 -1)
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 ##
-  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 -1)
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 ##
-  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 -1)
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 ##
-  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 -1)
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 ##
-  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 -1)
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 ##
-  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 -1)
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 ##
-  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 -1)
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 ##
-  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 -1)
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
@@ -276,33 +276,33 @@ define <8 x i32> @test_ucmp_w_512(<32 x
 define <8 x i32> @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_w_512
 ; CHECK: vpcmpequw %zmm1, %zmm0, %k0 {%k1} ##
-  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 0, i32 %mask)
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ##
-  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 1, i32 %mask)
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmpleuw %zmm1, %zmm0, %k0 {%k1} ##
-  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 2, i32 %mask)
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunorduw %zmm1, %zmm0, %k0 {%k1} ##
-  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 3, i32 %mask)
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpnequw %zmm1, %zmm0, %k0 {%k1} ##
-  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 4, i32 %mask)
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} ##
-  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 5, i32 %mask)
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} ##
-  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 6, i32 %mask)
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmporduw %zmm1, %zmm0, %k0 {%k1} ##
-  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i8 7, i32 %mask)
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
 
-declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i8, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
 
 ; CHECK-LABEL: test_x86_mask_blend_b_256
 ; CHECK: vpblendmb

Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=236979&r1=236978&r2=236979&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Mon May 11 04:03:14 2015
@@ -69,28 +69,28 @@ declare i16 @llvm.x86.avx512.mask.pcmpgt
 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
 ; CHECK_LABEL: test_cmp_b_256
 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 ##
-  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 0, i32 -1)
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 ##
-  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 1, i32 -1)
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 ##
-  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 2, i32 -1)
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 ##
-  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 3, i32 -1)
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 ##
-  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 4, i32 -1)
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 ##
-  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 5, i32 -1)
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 ##
-  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 6, i32 -1)
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 ##
-  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 7, i32 -1)
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
@@ -98,59 +98,59 @@ define <8 x i32> @test_cmp_b_256(<32 x i
 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
 ; CHECK_LABEL: test_mask_cmp_b_256
 ; CHECK: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 0, i32 %mask)
+  %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltb %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 1, i32 %mask)
+  %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 2, i32 %mask)
+  %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunordb %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 3, i32 %mask)
+  %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 4, i32 %mask)
+  %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 5, i32 %mask)
+  %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnleb %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 6, i32 %mask)
+  %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmpordb %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 7, i32 %mask)
+  %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
 
-declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i8, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
 
 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
 ; CHECK_LABEL: test_ucmp_b_256
 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 ##
-  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 0, i32 -1)
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 ##
-  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 1, i32 -1)
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 ##
-  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 2, i32 -1)
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 ##
-  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 3, i32 -1)
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 ##
-  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 4, i32 -1)
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 ##
-  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 5, i32 -1)
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 ##
-  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 6, i32 -1)
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 ##
-  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 7, i32 -1)
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
@@ -158,59 +158,59 @@ define <8 x i32> @test_ucmp_b_256(<32 x
 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_b_256
 ; CHECK: vpcmpequb %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 0, i32 %mask)
+  %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
 ; CHECK: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 1, i32 %mask)
+  %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
 ; CHECK: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 2, i32 %mask)
+  %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
 ; CHECK: vpcmpunordub %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 3, i32 %mask)
+  %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
 ; CHECK: vpcmpnequb %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 4, i32 %mask)
+  %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
 ; CHECK: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 5, i32 %mask)
+  %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
 ; CHECK: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 6, i32 %mask)
+  %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
 ; CHECK: vpcmpordub %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i8 7, i32 %mask)
+  %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   ret <8 x i32> %vec7
 }
 
-declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i8, i32) nounwind readnone
+declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
 
 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK_LABEL: test_cmp_w_256
 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 ##
-  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 0, i16 -1)
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 ##
-  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 1, i16 -1)
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 ##
-  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 2, i16 -1)
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 ##
-  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 3, i16 -1)
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 ##
-  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 4, i16 -1)
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 ##
-  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 5, i16 -1)
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 ##
-  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 6, i16 -1)
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 ##
-  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 7, i16 -1)
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
@@ -218,59 +218,59 @@ define <8 x i16> @test_cmp_w_256(<16 x i
 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
 ; CHECK_LABEL: test_mask_cmp_w_256
 ; CHECK: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 0, i16 %mask)
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltw %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 1, i16 %mask)
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmplew %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 2, i16 %mask)
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordw %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 3, i16 %mask)
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpneqw %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 4, i16 %mask)
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltw %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 5, i16 %mask)
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnlew %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 6, i16 %mask)
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordw %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 7, i16 %mask)
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
-declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i8, i16) nounwind readnone
+declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
 
 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
 ; CHECK_LABEL: test_ucmp_w_256
 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 ##
-  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 0, i16 -1)
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 ##
-  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 1, i16 -1)
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 ##
-  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 2, i16 -1)
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 ##
-  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 3, i16 -1)
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 ##
-  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 4, i16 -1)
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 ##
-  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 5, i16 -1)
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 ##
-  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 6, i16 -1)
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 ##
-  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 7, i16 -1)
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
@@ -278,33 +278,33 @@ define <8 x i16> @test_ucmp_w_256(<16 x
 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_w_256
 ; CHECK: vpcmpequw %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 0, i16 %mask)
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 1, i16 %mask)
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleuw %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 2, i16 %mask)
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunorduw %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 3, i16 %mask)
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpnequw %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 4, i16 %mask)
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltuw %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 5, i16 %mask)
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleuw %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 6, i16 %mask)
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmporduw %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i8 7, i16 %mask)
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
-declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i8, i16) nounwind readnone
+declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
 
 ; 128-bit
 
@@ -375,28 +375,28 @@ declare i8 @llvm.x86.avx512.mask.pcmpgt.
 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK_LABEL: test_cmp_b_128
 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 ##
-  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 0, i16 -1)
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 ##
-  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 1, i16 -1)
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 ##
-  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 2, i16 -1)
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 ##
-  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 3, i16 -1)
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 ##
-  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 4, i16 -1)
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 ##
-  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 5, i16 -1)
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 ##
-  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 6, i16 -1)
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 ##
-  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 7, i16 -1)
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
@@ -404,59 +404,59 @@ define <8 x i16> @test_cmp_b_128(<16 x i
 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
 ; CHECK_LABEL: test_mask_cmp_b_128
 ; CHECK: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 0, i16 %mask)
+  %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltb %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 1, i16 %mask)
+  %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleb %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 2, i16 %mask)
+  %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordb %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 3, i16 %mask)
+  %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpneqb %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 4, i16 %mask)
+  %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltb %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 5, i16 %mask)
+  %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleb %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 6, i16 %mask)
+  %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordb %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 7, i16 %mask)
+  %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
-declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i8, i16) nounwind readnone
+declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
 
 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
 ; CHECK_LABEL: test_ucmp_b_128
 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 ##
-  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 0, i16 -1)
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 ##
-  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 1, i16 -1)
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 ##
-  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 2, i16 -1)
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 ##
-  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 3, i16 -1)
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 ##
-  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 4, i16 -1)
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 ##
-  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 5, i16 -1)
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 ##
-  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 6, i16 -1)
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 ##
-  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 7, i16 -1)
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
@@ -464,59 +464,59 @@ define <8 x i16> @test_ucmp_b_128(<16 x
 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_b_128
 ; CHECK: vpcmpequb %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 0, i16 %mask)
+  %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
 ; CHECK: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 1, i16 %mask)
+  %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
 ; CHECK: vpcmpleub %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 2, i16 %mask)
+  %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
 ; CHECK: vpcmpunordub %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 3, i16 %mask)
+  %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
 ; CHECK: vpcmpnequb %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 4, i16 %mask)
+  %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
 ; CHECK: vpcmpnltub %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 5, i16 %mask)
+  %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
 ; CHECK: vpcmpnleub %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 6, i16 %mask)
+  %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
 ; CHECK: vpcmpordub %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i8 7, i16 %mask)
+  %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   ret <8 x i16> %vec7
 }
 
-declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i8, i16) nounwind readnone
+declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
 
 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK_LABEL: test_cmp_w_128
 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -524,59 +524,59 @@ define <8 x i8> @test_cmp_w_128(<8 x i16
 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
 ; CHECK_LABEL: test_mask_cmp_w_128
 ; CHECK: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltw %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmplew %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordw %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqw %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltw %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnlew %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordw %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
 ; CHECK_LABEL: test_ucmp_w_128
 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -584,33 +584,33 @@ define <8 x i8> @test_ucmp_w_128(<8 x i1
 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
 ; CHECK_LABEL: test_mask_ucmp_w_128
 ; CHECK: vpcmpequw %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuw %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduw %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequw %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuw %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuw %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduw %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
 
 declare <8 x float> @llvm.x86.fma.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
 

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=236979&r1=236978&r2=236979&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Mon May 11 04:03:14 2015
@@ -69,28 +69,28 @@ declare i8 @llvm.x86.avx512.mask.pcmpgt.
 define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: test_cmp_d_256
 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltd %ymm1, %ymm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpled %ymm1, %ymm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnled %ymm1, %ymm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordd %ymm1, %ymm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -98,59 +98,59 @@ define <8 x i8> @test_cmp_d_256(<8 x i32
 define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_cmp_d_256
 ; CHECK: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltd %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpled %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordd %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqd %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltd %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnled %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordd %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
 ; CHECK-LABEL: test_ucmp_d_256
 ; CHECK: vpcmpequd %ymm1, %ymm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltud %ymm1, %ymm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleud %ymm1, %ymm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordud %ymm1, %ymm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -158,59 +158,59 @@ define <8 x i8> @test_ucmp_d_256(<8 x i3
 define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_ucmp_d_256
 ; CHECK: vpcmpequd %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleud %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordud %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequd %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltud %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleud %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordud %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_cmp_q_256
 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltq %ymm1, %ymm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleq %ymm1, %ymm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordq %ymm1, %ymm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -218,59 +218,59 @@ define <8 x i8> @test_cmp_q_256(<4 x i64
 define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_cmp_q_256
 ; CHECK: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltq %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleq %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordq %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqq %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltq %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleq %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordq %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
 ; CHECK-LABEL: test_ucmp_q_256
 ; CHECK: vpcmpequq %ymm1, %ymm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduq %ymm1, %ymm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -278,33 +278,33 @@ define <8 x i8> @test_ucmp_q_256(<4 x i6
 define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_ucmp_q_256
 ; CHECK: vpcmpequq %ymm1, %ymm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuq %ymm1, %ymm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduq %ymm1, %ymm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequq %ymm1, %ymm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuq %ymm1, %ymm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuq %ymm1, %ymm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduq %ymm1, %ymm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
 
 ; 128-bit
 
@@ -375,28 +375,28 @@ declare i8 @llvm.x86.avx512.mask.pcmpgt.
 define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_cmp_d_128
 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltd %xmm1, %xmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpled %xmm1, %xmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnled %xmm1, %xmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordd %xmm1, %xmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -404,59 +404,59 @@ define <8 x i8> @test_cmp_d_128(<4 x i32
 define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_cmp_d_128
 ; CHECK: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltd %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpled %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordd %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqd %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltd %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnled %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordd %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: test_ucmp_d_128
 ; CHECK: vpcmpequd %xmm1, %xmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltud %xmm1, %xmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleud %xmm1, %xmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordud %xmm1, %xmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -464,59 +464,59 @@ define <8 x i8> @test_ucmp_d_128(<4 x i3
 define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_ucmp_d_128
 ; CHECK: vpcmpequd %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleud %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordud %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequd %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltud %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleud %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordud %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_cmp_q_128
 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltq %xmm1, %xmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleq %xmm1, %xmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordq %xmm1, %xmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -524,59 +524,59 @@ define <8 x i8> @test_cmp_q_128(<2 x i64
 define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_cmp_q_128
 ; CHECK: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltq %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleq %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunordq %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpneqq %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltq %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleq %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmpordq %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
 
 define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
 ; CHECK-LABEL: test_ucmp_q_128
 ; CHECK: vpcmpequq %xmm1, %xmm0, %k0 ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 0, i8 -1)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 1, i8 -1)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 2, i8 -1)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 3, i8 -1)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 4, i8 -1)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 -1)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 6, i8 -1)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduq %xmm1, %xmm0, %k0 ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 7, i8 -1)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
@@ -584,33 +584,33 @@ define <8 x i8> @test_ucmp_q_128(<2 x i6
 define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
 ; CHECK-LABEL: test_mask_ucmp_q_128
 ; CHECK: vpcmpequq %xmm1, %xmm0, %k0 {%k1} ##
-  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 0, i8 %mask)
+  %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
 ; CHECK: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ##
-  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 1, i8 %mask)
+  %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
 ; CHECK: vpcmpleuq %xmm1, %xmm0, %k0 {%k1} ##
-  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 2, i8 %mask)
+  %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
 ; CHECK: vpcmpunorduq %xmm1, %xmm0, %k0 {%k1} ##
-  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 3, i8 %mask)
+  %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
 ; CHECK: vpcmpnequq %xmm1, %xmm0, %k0 {%k1} ##
-  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 4, i8 %mask)
+  %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
 ; CHECK: vpcmpnltuq %xmm1, %xmm0, %k0 {%k1} ##
-  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 5, i8 %mask)
+  %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
 ; CHECK: vpcmpnleuq %xmm1, %xmm0, %k0 {%k1} ##
-  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 6, i8 %mask)
+  %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
 ; CHECK: vpcmporduq %xmm1, %xmm0, %k0 {%k1} ##
-  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i8 7, i8 %mask)
+  %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   ret <8 x i8> %vec7
 }
 
-declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i8, i8) nounwind readnone
+declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
 
 ; CHECK-LABEL: compr1
 ; CHECK: vcompresspd %zmm0





More information about the llvm-commits mailing list