[llvm] r302018 - Support arbitrary address space pointers in masked gather/scatter intrinsics.

Wed May 3 05:28:54 PDT 2017

Author: eladcohen
Date: Wed May  3 07:28:54 2017
New Revision: 302018

URL: http://llvm.org/viewvc/llvm-project?rev=302018&view=rev
Log:
Support arbitrary address space pointers in masked gather/scatter intrinsics.

Fixes PR31789 - When loop-vectorize tries to use these intrinsics for a
non-default address space pointer we fail with a "Calling a function with a
bad singature!" assertion. This patch solves this by adding the 'vector of
pointers' argument as an overloaded type which will determine the address
space.

Differential revision: https://reviews.llvm.org/D31490


Added:
    llvm/trunk/test/Verifier/scatter_gather.ll
Modified:
    llvm/trunk/docs/LangRef.rst
    llvm/trunk/include/llvm/IR/Intrinsics.h
    llvm/trunk/include/llvm/IR/Intrinsics.td
    llvm/trunk/lib/IR/AutoUpgrade.cpp
    llvm/trunk/lib/IR/Function.cpp
    llvm/trunk/lib/IR/IRBuilder.cpp
    llvm/trunk/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
    llvm/trunk/test/Analysis/CostModel/X86/vector_gep.ll
    llvm/trunk/test/Assembler/auto_upgrade_intrinsics.ll
    llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
    llvm/trunk/test/Transforms/FunctionAttrs/readattrs.ll
    llvm/trunk/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
    llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll
    llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
    llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll
    llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll
    llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll
    llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp

Modified: llvm/trunk/docs/LangRef.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/LangRef.rst?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================

--- llvm/trunk/docs/LangRef.rst (original)
+++ llvm/trunk/docs/LangRef.rst Wed May  3 07:28:54 2017
@@ -7915,7 +7915,7 @@ makes sense:
     ; get pointers for 8 elements from array B
     %ptrs = getelementptr double, double* %B, <8 x i32> %C
     ; load 8 elements from array B into A
-    %A = call <8 x double> @llvm.masked.gather.v8f64(<8 x double*> %ptrs,
+    %A = call <8 x double> @llvm.masked.gather.v8f64.v8p0f64(<8 x double*> %ptrs,
          i32 8, <8 x i1> %mask, <8 x double> %passthru)
 
 Conversion Operations
@@ -12024,9 +12024,9 @@ This is an overloaded intrinsic. The loa
 
 ::
 
-      declare <16 x float> @llvm.masked.gather.v16f32   (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
-      declare <2 x double> @llvm.masked.gather.v2f64    (<2 x double*> <ptrs>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
-      declare <8 x float*> @llvm.masked.gather.v8p0f32  (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1>  <mask>, <8 x float*> <passthru>)
+      declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32   (<16 x float*> <ptrs>, i32 <alignment>, <16 x i1> <mask>, <16 x float> <passthru>)
+      declare <2 x double> @llvm.masked.gather.v2f64.v2p1f64     (<2 x double addrspace(1)*> <ptrs>, i32 <alignment>, <2 x i1>  <mask>, <2 x double> <passthru>)
+      declare <8 x float*> @llvm.masked.gather.v8p0f32.v8p0p0f32 (<8 x float**> <ptrs>, i32 <alignment>, <8 x i1>  <mask>, <8 x float*> <passthru>)
 
 Overview:
 """""""""
@@ -12049,7 +12049,7 @@ The semantics of this operation are equi
 
 ::
 
-       %res = call <4 x double> @llvm.masked.gather.v4f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
+       %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64 (<4 x double*> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
 
        ;; The gather with all-true mask is equivalent to the following instruction sequence
        %ptr0 = extractelement <4 x double*> %ptrs, i32 0
@@ -12078,9 +12078,9 @@ This is an overloaded intrinsic. The dat
 
 ::
 
-       declare void @llvm.masked.scatter.v8i32   (<8 x i32>     <value>, <8 x i32*>     <ptrs>, i32 <alignment>, <8 x i1>  <mask>)
-       declare void @llvm.masked.scatter.v16f32  (<16 x float>  <value>, <16 x float*>  <ptrs>, i32 <alignment>, <16 x i1> <mask>)
-       declare void @llvm.masked.scatter.v4p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1>  <mask>)
+       declare void @llvm.masked.scatter.v8i32.v8p0i32     (<8 x i32>     <value>, <8 x i32*>     <ptrs>, i32 <alignment>, <8 x i1>  <mask>)
+       declare void @llvm.masked.scatter.v16f32.v16p1f32   (<16 x float>  <value>, <16 x float addrspace(1)*>  <ptrs>, i32 <alignment>, <16 x i1> <mask>)
+       declare void @llvm.masked.scatter.v4p0f64.v4p0p0f64 (<4 x double*> <value>, <4 x double**> <ptrs>, i32 <alignment>, <4 x i1>  <mask>)
 
 Overview:
 """""""""
@@ -12101,7 +12101,7 @@ The '``llvm.masked.scatter``' intrinsics
 ::
 
        ;; This instruction unconditionally stores data vector in multiple addresses
-       call @llvm.masked.scatter.v8i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4,  <8 x i1>  <true, true, .. true>)
+       call @llvm.masked.scatter.v8i32.v8p0i32 (<8 x i32> %value, <8 x i32*> %ptrs, i32 4,  <8 x i1>  <true, true, .. true>)
 
        ;; It is equivalent to a list of scalar stores
        %val0 = extractelement <8 x i32> %value, i32 0

Modified: llvm/trunk/include/llvm/IR/Intrinsics.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.h?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Intrinsics.h (original)
+++ llvm/trunk/include/llvm/IR/Intrinsics.h Wed May  3 07:28:54 2017
@@ -100,7 +100,7 @@ namespace Intrinsic {
       Void, VarArg, MMX, Token, Metadata, Half, Float, Double,
       Integer, Vector, Pointer, Struct,
       Argument, ExtendArgument, TruncArgument, HalfVecArgument,
-      SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfPtrsToElt
+      SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfAnyPtrsToElt
     } Kind;
 
     union {
@@ -119,25 +119,43 @@ namespace Intrinsic {
       AK_AnyVector,
       AK_AnyPointer
     };
+
     unsigned getArgumentNumber() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
              Kind == SameVecWidthArgument || Kind == PtrToArgument ||
-             Kind == PtrToElt || Kind == VecOfPtrsToElt);
+             Kind == PtrToElt);
       return Argument_Info >> 3;
     }
     ArgKind getArgumentKind() const {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
-             Kind == SameVecWidthArgument || Kind == PtrToArgument ||
-             Kind == VecOfPtrsToElt);
+             Kind == SameVecWidthArgument || Kind == PtrToArgument);
       return (ArgKind)(Argument_Info & 7);
     }
 
+    // VecOfAnyPtrsToElt uses both an overloaded argument (for address space)
+    // and a reference argument (for matching vector width and element types)
+    unsigned getOverloadArgNumber() const {
+      assert(Kind == VecOfAnyPtrsToElt);
+      return Argument_Info >> 16;
+    }
+    unsigned getRefArgNumber() const {
+      assert(Kind == VecOfAnyPtrsToElt);
+      return Argument_Info & 0xFFFF;
+    }
+
     static IITDescriptor get(IITDescriptorKind K, unsigned Field) {
       IITDescriptor Result = { K, { Field } };
       return Result;
     }
+
+    static IITDescriptor get(IITDescriptorKind K, unsigned short Hi,
+                             unsigned short Lo) {
+      unsigned Field = Hi << 16 | Lo;
+      IITDescriptor Result = {K, {Field}};
+      return Result;
+    }
   };
 
   /// Return the IIT table descriptor for the specified intrinsic into an array

Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
+++ llvm/trunk/include/llvm/IR/Intrinsics.td Wed May  3 07:28:54 2017
@@ -155,7 +155,7 @@ class LLVMVectorSameWidth<int num, LLVMT
 }
 class LLVMPointerTo<int num> : LLVMMatchType<num>;
 class LLVMPointerToElt<int num> : LLVMMatchType<num>;
-class LLVMVectorOfPointersToElt<int num> : LLVMMatchType<num>;
+class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
 
 // Match the type of another intrinsic parameter that is expected to be a
 // vector type, but change the element count to be half as many
@@ -761,14 +761,14 @@ def int_masked_load  : Intrinsic<[llvm_a
                                  [IntrReadMem, IntrArgMemOnly]>;
 
 def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
-                                 [LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
+                                 [LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
                                   LLVMVectorSameWidth<0, llvm_i1_ty>,
                                   LLVMMatchType<0>],
                                  [IntrReadMem]>;
 
 def int_masked_scatter: Intrinsic<[],
                                   [llvm_anyvector_ty,
-                                   LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
+                                   LLVMVectorOfAnyPointersToElt<0>, llvm_i32_ty,
                                    LLVMVectorSameWidth<0, llvm_i1_ty>]>;
 
 def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],

Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Wed May  3 07:28:54 2017
@@ -467,6 +467,27 @@ static bool UpgradeIntrinsicFunction1(Fu
         return true;
       }
     }
+    // Renaming gather/scatter intrinsics with no address space overloading
+    // to the new overload which includes an address space
+    if (Name.startswith("masked.gather.")) {
+      Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
+      if (F->getName() != Intrinsic::getName(Intrinsic::masked_gather, Tys)) {
+        rename(F);
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          Intrinsic::masked_gather, Tys);
+        return true;
+      }
+    }
+    if (Name.startswith("masked.scatter.")) {
+      auto Args = F->getFunctionType()->params();
+      Type *Tys[] = {Args[0], Args[1]};
+      if (F->getName() != Intrinsic::getName(Intrinsic::masked_scatter, Tys)) {
+        rename(F);
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          Intrinsic::masked_scatter, Tys);
+        return true;
+      }
+    }
     break;
   }
   case 'n': {
@@ -2072,7 +2093,9 @@ void llvm::UpgradeIntrinsicCall(CallInst
   case Intrinsic::invariant_start:
   case Intrinsic::invariant_end:
   case Intrinsic::masked_load:
-  case Intrinsic::masked_store: {
+  case Intrinsic::masked_store:
+  case Intrinsic::masked_gather:
+  case Intrinsic::masked_scatter: {
     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
                                  CI->arg_operands().end());
     NewCall = Builder.CreateCall(NewFn, Args);

Modified: llvm/trunk/lib/IR/Function.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Function.cpp?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/lib/IR/Function.cpp (original)
+++ llvm/trunk/lib/IR/Function.cpp Wed May  3 07:28:54 2017
@@ -574,13 +574,12 @@ enum IIT_Info {
   IIT_SAME_VEC_WIDTH_ARG = 31,
   IIT_PTR_TO_ARG = 32,
   IIT_PTR_TO_ELT = 33,
-  IIT_VEC_OF_PTRS_TO_ELT = 34,
+  IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
   IIT_I128 = 35,
   IIT_V512 = 36,
   IIT_V1024 = 37
 };
 
-
 static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
                       SmallVectorImpl<Intrinsic::IITDescriptor> &OutputTable) {
   IIT_Info Info = IIT_Info(Infos[NextElt++]);
@@ -716,10 +715,11 @@ static void DecodeIITType(unsigned &Next
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo));
     return;
   }
-  case IIT_VEC_OF_PTRS_TO_ELT: {
-    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
-    OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt,
-                                             ArgInfo));
+  case IIT_VEC_OF_ANYPTRS_TO_ELT: {
+    unsigned short ArgNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    unsigned short RefNo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(
+        IITDescriptor::get(IITDescriptor::VecOfAnyPtrsToElt, ArgNo, RefNo));
     return;
   }
   case IIT_EMPTYSTRUCT:
@@ -808,7 +808,6 @@ static Type *DecodeFixedType(ArrayRef<In
       Elts[i] = DecodeFixedType(Infos, Tys, Context);
     return StructType::get(Context, makeArrayRef(Elts,D.Struct_NumElements));
   }
-
   case IITDescriptor::Argument:
     return Tys[D.getArgumentNumber()];
   case IITDescriptor::ExtendArgument: {
@@ -850,15 +849,9 @@ static Type *DecodeFixedType(ArrayRef<In
     Type *EltTy = VTy->getVectorElementType();
     return PointerType::getUnqual(EltTy);
   }
-  case IITDescriptor::VecOfPtrsToElt: {
-    Type *Ty = Tys[D.getArgumentNumber()];
-    VectorType *VTy = dyn_cast<VectorType>(Ty);
-    if (!VTy)
-      llvm_unreachable("Expected an argument of Vector Type");
-    Type *EltTy = VTy->getVectorElementType();
-    return VectorType::get(PointerType::getUnqual(EltTy),
-                           VTy->getNumElements());
-  }
+  case IITDescriptor::VecOfAnyPtrsToElt:
+    // Return the overloaded type (which determines the pointers address space)
+    return Tys[D.getOverloadArgNumber()];
  }
   llvm_unreachable("unhandled");
 }
@@ -1054,11 +1047,22 @@ bool Intrinsic::matchIntrinsicType(Type
       return (!ThisArgType || !ReferenceType ||
               ThisArgType->getElementType() != ReferenceType->getElementType());
     }
-    case IITDescriptor::VecOfPtrsToElt: {
-      if (D.getArgumentNumber() >= ArgTys.size())
+    case IITDescriptor::VecOfAnyPtrsToElt: {
+      unsigned RefArgNumber = D.getRefArgNumber();
+
+      // This may only be used when referring to a previous argument.
+      if (RefArgNumber >= ArgTys.size())
         return true;
-      VectorType * ReferenceType =
-              dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
+
+      // Record the overloaded type
+      assert(D.getOverloadArgNumber() == ArgTys.size() &&
+             "Table consistency error");
+      ArgTys.push_back(Ty);
+
+      // Verify the overloaded type "matches" the Ref type.
+      // i.e. Ty is a vector with the same width as Ref.
+      // Composed of pointers to the same element type as Ref.
+      VectorType *ReferenceType = dyn_cast<VectorType>(ArgTys[RefArgNumber]);
       VectorType *ThisArgVecTy = dyn_cast<VectorType>(Ty);
       if (!ThisArgVecTy || !ReferenceType ||
           (ReferenceType->getVectorNumElements() !=

Modified: llvm/trunk/lib/IR/IRBuilder.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/IRBuilder.cpp?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/lib/IR/IRBuilder.cpp (original)
+++ llvm/trunk/lib/IR/IRBuilder.cpp Wed May  3 07:28:54 2017
@@ -293,11 +293,13 @@ CallInst *IRBuilderBase::CreateMaskedGat
     Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context),
                                      NumElts));
 
+  Type *OverloadedTypes[] = {DataTy, PtrsTy};
   Value * Ops[] = {Ptrs, getInt32(Align), Mask, UndefValue::get(DataTy)};
 
   // We specify only one type when we create this intrinsic. Types of other
   // arguments are derived from this type.
-  return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, { DataTy }, Name);
+  return CreateMaskedIntrinsic(Intrinsic::masked_gather, Ops, OverloadedTypes,
+                               Name);
 }
 
 /// \brief Create a call to a Masked Scatter intrinsic.
@@ -323,11 +325,13 @@ CallInst *IRBuilderBase::CreateMaskedSca
   if (!Mask)
     Mask = Constant::getAllOnesValue(VectorType::get(Type::getInt1Ty(Context),
                                      NumElts));
+
+  Type *OverloadedTypes[] = {DataTy, PtrsTy};
   Value * Ops[] = {Data, Ptrs, getInt32(Align), Mask};
 
   // We specify only one type when we create this intrinsic. Types of other
   // arguments are derived from this type.
-  return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, { DataTy });
+  return CreateMaskedIntrinsic(Intrinsic::masked_scatter, Ops, OverloadedTypes);
 }
 
 template <typename T0, typename T1, typename T2, typename T3>

Modified: llvm/trunk/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll Wed May  3 07:28:54 2017
@@ -78,10 +78,10 @@ define <2 x double> @test_gather_2f64(<2
 ; SKX-LABEL: test_gather_2f64
 ; SKX: Found an estimated cost of 7 {{.*}}.gather
 
-%res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
+%res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0)
   ret <2 x double> %res
 }
-declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
 
 define <4 x i32> @test_gather_4i32(<4 x i32*> %ptrs, <4 x i1> %mask, <4 x i32> %src0)  {
 
@@ -94,7 +94,7 @@ define <4 x i32> @test_gather_4i32(<4 x
 ; SKX-LABEL: test_gather_4i32
 ; SKX: Found an estimated cost of 6 {{.*}}.gather
 
-%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
+%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0)
   ret <4 x i32> %res
 }
 
@@ -109,10 +109,10 @@ define <4 x i32> @test_gather_4i32_const
 ; SKX-LABEL: test_gather_4i32_const_mask
 ; SKX: Found an estimated cost of 6 {{.*}}.gather
 
-%res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
+%res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> %src0)
   ret <4 x i32> %res
 }
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32, <4 x i1> %mask, <4 x i32> %src0)
 
 define <16 x float> @test_gather_16f32_const_mask(float* %base, <16 x i32> %ind) {
 
@@ -128,7 +128,7 @@ define <16 x float> @test_gather_16f32_c
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -146,7 +146,7 @@ define <16 x float> @test_gather_16f32_v
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.v = getelementptr float, float* %base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -164,7 +164,7 @@ define <16 x float> @test_gather_16f32_r
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.v = getelementptr float, <16 x float*> %ptrs, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -185,7 +185,7 @@ define <16 x float> @test_gather_16f32_c
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -204,7 +204,7 @@ define void @test_scatter_16i32(i32* %ba
 
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
   %imask = bitcast i16 %mask to <16 x i1>
-  call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
   ret void
 }
 
@@ -218,11 +218,11 @@ define void @test_scatter_8i32(<8 x i32>
 ; SKX-LABEL: test_scatter_8i32
 ; SKX: Found an estimated cost of 10 {{.*}}.scatter
 
-  call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
+  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> %mask)
   ret void
 }
 
-declare void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
+declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32, <8 x i1> %mask)
 
 define void @test_scatter_4i32(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ; AVX2-LABEL: test_scatter_4i32
@@ -234,7 +234,7 @@ define void @test_scatter_4i32(<4 x i32>
 ; SKX-LABEL: test_scatter_4i32
 ; SKX: Found an estimated cost of 6 {{.*}}.scatter
 
-  call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
   ret void
 }
 
@@ -252,7 +252,7 @@ define <4 x float> @test_gather_4f32(flo
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
 
-  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
+  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef)
   ret <4 x float>%res
 }
 
@@ -270,14 +270,14 @@ define <4 x float> @test_gather_4f32_con
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.v = getelementptr float, float* %ptr, <4 x i64> %sext_ind
 
-  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
   ret <4 x float>%res
 }
 
-declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
-declare void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
-declare void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
-declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.v, i32, <4 x i1> %mask, <4 x float> )
+declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32, <4 x i1> %mask)
+declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32, <16 x i1> %imask)
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.v, i32, <16 x i1> %mask, <16 x float>)
 
 declare <16 x i32> @llvm.masked.load.v16i32.p0v16i32(<16 x i32>*, i32, <16 x i1>, <16 x i32>)
 declare <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>*, i32, <4 x i1>, <4 x i32>)

Modified: llvm/trunk/test/Analysis/CostModel/X86/vector_gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/vector_gep.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/vector_gep.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/vector_gep.ll Wed May  3 07:28:54 2017
@@ -3,7 +3,7 @@
 %struct.S = type { [1000 x i32] }
 
 
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
 
 define <4 x i32> @foov(<4 x %struct.S*> %s, i64 %base){
   %temp = insertelement <4 x i64> undef, i64 %base, i32 0
@@ -12,6 +12,6 @@ define <4 x i32> @foov(<4 x %struct.S*>
   %B = getelementptr inbounds %struct.S, <4 x %struct.S*> %s, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
 ;CHECK: cost of 0 for instruction: {{.*}} getelementptr inbounds [1000 x i32]
   %arrayidx = getelementptr inbounds [1000 x i32], <4 x [1000 x i32]*> %B, <4 x i64> zeroinitializer, <4 x i64> %vector
-  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %arrayidx, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
   ret <4 x i32> %res
 }

Modified: llvm/trunk/test/Assembler/auto_upgrade_intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Assembler/auto_upgrade_intrinsics.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Assembler/auto_upgrade_intrinsics.ll (original)
+++ llvm/trunk/test/Assembler/auto_upgrade_intrinsics.ll Wed May  3 07:28:54 2017
@@ -85,6 +85,23 @@ define void @tests.masked.store(<2 x dou
   ret void
 }
 
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+
+define <2 x double> @tests.masked.gather(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %passthru)  {
+; CHECK-LABEL: @tests.masked.gather(
+; CHECK: @llvm.masked.gather.v2f64.v2p0f64
+  %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptr, i32 1, <2 x i1> %mask, <2 x double> %passthru)
+  ret <2 x double> %res
+}
+
+declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
+
+define void @tests.masked.scatter(<2 x double*> %ptr, <2 x i1> %mask, <2 x double> %val)  {
+; CHECK-LABEL: @tests.masked.scatter(
+; CHECK: @llvm.masked.scatter.v2f64.v2p0f64
+  call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptr, i32 3, <2 x i1> %mask)
+  ret void
+}
 
 declare {}* @llvm.invariant.start(i64, i8* nocapture) nounwind readonly
 declare void @llvm.invariant.end({}*, i64, i8* nocapture) nounwind

Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Wed May  3 07:28:54 2017
@@ -54,13 +54,13 @@ define <16 x float> @test1(float* %base,
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
-declare <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
-declare <16 x float> @llvm.masked.gather.v16f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
-declare <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
+declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
 
 
 ; SCALAR-LABEL: test2
@@ -111,7 +111,7 @@ define <16 x float> @test2(float* %base,
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
   %imask = bitcast i16 %mask to <16 x i1>
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
   ret <16 x float> %res
 }
 
@@ -152,7 +152,7 @@ define <16 x i32> @test3(i32* %base, <16
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind
   %imask = bitcast i16 %mask to <16 x i1>
-  %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
   ret <16 x i32> %res
 }
 
@@ -205,8 +205,8 @@ define <16 x i32> @test4(i32* %base, <16
 
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
   %imask = bitcast i16 %mask to <16 x i1>
-  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
-  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
   %res = add <16 x i32> %gt1, %gt2
   ret <16 x i32> %res
 }
@@ -270,13 +270,13 @@ define void @test5(i32* %base, <16 x i32
 
   %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
   %imask = bitcast i16 %mask to <16 x i1>
-  call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
-  call void @llvm.masked.scatter.v16i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
   ret void
 }
 
-declare void @llvm.masked.scatter.v8i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
-declare void @llvm.masked.scatter.v16i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
+declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
+declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
 
 
 ; SCALAR-LABEL: test6
@@ -326,9 +326,9 @@ define <8 x i32> @test6(<8 x i32>%a1, <8
 ; SKX_32-NEXT:    vmovdqa %ymm2, %ymm0
 ; SKX_32-NEXT:    retl
 
-  %a = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  %a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 
-  call void @llvm.masked.scatter.v8i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
   ret <8 x i32>%a
 }
 
@@ -384,8 +384,8 @@ define <8 x i32> @test7(i32* %base, <8 x
 
   %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind
   %imask = bitcast i8 %mask to <8 x i1>
-  %gt1 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
-  %gt2 = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
+  %gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
+  %gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
   %res = add <8 x i32> %gt1, %gt2
   ret <8 x i32> %res
 }
@@ -444,8 +444,8 @@ define <16 x i32> @test8(<16 x i32*> %pt
 ; SKX_32-NEXT:    retl
 
   %imask = bitcast i16 %mask to <16 x i1>
-  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
-  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
+  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
+  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
   %res = add <16 x i32> %gt1, %gt2
   ret <16 x i32> %res
 }
@@ -522,7 +522,7 @@ entry:
   %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
 
   %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13>
-  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
   ret <8 x i32> %res
 }
 
@@ -591,7 +591,7 @@ entry:
   %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
 
   %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
-  %res = call <8 x i32 >  @llvm.masked.gather.v8i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+  %res = call <8 x i32 >  @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
   ret <8 x i32> %res
 }
 
@@ -632,7 +632,7 @@ define <16 x float> @test11(float* %base
 
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -671,7 +671,7 @@ define <16 x float> @test12(float* %base
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -710,7 +710,7 @@ define <16 x float> @test13(float* %base
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
@@ -772,13 +772,13 @@ define <16 x float> @test14(float* %base
 
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
   ret <16 x float>%res
 }
 
-declare <4 x float> @llvm.masked.gather.v4f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
-declare <4 x double> @llvm.masked.gather.v4f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
-declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
+declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
 
 ; Gather smaller than existing instruction
 define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
@@ -831,7 +831,7 @@ define <4 x float> @test15(float* %base,
 
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind
-  %res = call <4 x float> @llvm.masked.gather.v4f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
+  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
   ret <4 x float>%res
 }
 
@@ -890,7 +890,7 @@ define <4 x double> @test16(double* %bas
 
   %sext_ind = sext <4 x i32> %ind to <4 x i64>
   %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind
-  %res = call <4 x double> @llvm.masked.gather.v4f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
+  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
   ret <4 x double>%res
 }
 
@@ -942,15 +942,15 @@ define <2 x double> @test17(double* %bas
 
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind
-  %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
   ret <2 x double>%res
 }
 
-declare void @llvm.masked.scatter.v4i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
-declare void @llvm.masked.scatter.v4f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
-declare void @llvm.masked.scatter.v2i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
-declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
-declare void @llvm.masked.scatter.v2f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
+declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
 
 define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 ;
@@ -995,7 +995,7 @@ define void @test18(<4 x i32>%a1, <4 x i
 ; SKX_32-NEXT:    vptestmd %xmm2, %xmm2, %k1
 ; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v4i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
+  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
   ret void
 }
 
@@ -1049,7 +1049,7 @@ define void @test19(<4 x double>%a1, dou
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
   %gep = getelementptr double, double* %ptr, <4 x i64> %ind
-  call void @llvm.masked.scatter.v4f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
+  call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
   ret void
 }
 
@@ -1103,7 +1103,7 @@ define void @test20(<2 x float>%a1, <2 x
 ; SKX_32-NEXT:    kshiftrb $6, %k0, %k1
 ; SKX_32-NEXT:    vscatterdps %xmm0, (,%xmm1) {%k1}
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v2f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
+  call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
   ret void
 }
 
@@ -1157,12 +1157,12 @@ define void @test21(<2 x i32>%a1, <2 x i
 ; SKX_32-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
   ret void
 }
 
 ; The result type requires widening
-declare <2 x float> @llvm.masked.gather.v2f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
+declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
 
 define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) {
 ;
@@ -1222,12 +1222,12 @@ define <2 x float> @test22(float* %base,
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
-  %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
+  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
   ret <2 x float>%res
 }
 
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
-declare <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
 
 define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
 ;
@@ -1276,7 +1276,7 @@ define <2 x i32> @test23(i32* %base, <2
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
-  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
+  %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
   ret <2 x i32>%res
 }
 
@@ -1320,7 +1320,7 @@ define <2 x i32> @test24(i32* %base, <2
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
-  %res = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
+  %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
   ret <2 x i32>%res
 }
 
@@ -1371,7 +1371,7 @@ define <2 x i64> @test25(i64* %base, <2
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
-  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
+  %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
   ret <2 x i64>%res
 }
 
@@ -1418,7 +1418,7 @@ define <2 x i64> @test26(i64* %base, <2
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
-  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
+  %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
   ret <2 x i64>%res
 }
 
@@ -1466,7 +1466,7 @@ define <2 x float> @test27(float* %base,
 ; SKX_32-NEXT:    retl
   %sext_ind = sext <2 x i32> %ind to <2 x i64>
   %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
-  %res = call <2 x float> @llvm.masked.gather.v2f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
+  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
   ret <2 x float>%res
 }
 
@@ -1515,7 +1515,7 @@ define void @test28(<2 x i32>%a1, <2 x i
 ; SKX_32-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
   ret void
 }
 
@@ -1568,23 +1568,23 @@ define <16 x float> @test29(float* %base
   %sext_ind = sext <16 x i32> %ind to <16 x i64>
   %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
   ret <16 x float>%res
 }
 
 ; Check non-power-of-2 case. It should be scalarized.
-declare <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
+declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
 define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
 ; ALL-LABEL: test30:
 ; ALL-NOT:       gather
 
   %sext_ind = sext <3 x i32> %ind to <3 x i64>
   %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
-  %res = call <3 x i32> @llvm.masked.gather.v3i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
+  %res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
   ret <3 x i32>%res
 }
 
-declare <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
+declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
 
 ; KNL-LABEL: test31
 ; KNL: vpgatherqq
@@ -1626,7 +1626,7 @@ define <16 x float*> @test31(<16 x float
 ; SKX_32-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; SKX_32-NEXT:    retl
 
-  %res = call <16 x float*> @llvm.masked.gather.v16p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
+  %res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
   ret <16 x float*>%res
 }
 
@@ -1672,7 +1672,7 @@ define <16 x i32> @test_gather_16i32(<16
 ; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 ; SKX_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 ; SKX_32-NEXT:    retl
-  %res = call <16 x i32> @llvm.masked.gather.v16i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
+  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
   ret <16 x i32> %res
 }
 define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
@@ -1749,10 +1749,10 @@ define <16 x i64> @test_gather_16i64(<16
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    retl
-  %res = call <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
+  %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
   ret <16 x i64> %res
 }
-declare <16 x i64> @llvm.masked.gather.v16i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
+declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
 define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
 ; KNL_64-LABEL: test_gather_16f32:
 ; KNL_64:       # BB#0:
@@ -1795,7 +1795,7 @@ define <16 x float> @test_gather_16f32(<
 ; SKX_32-NEXT:    vgatherdps (,%zmm0), %zmm2 {%k1}
 ; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
 ; SKX_32-NEXT:    retl
-  %res = call <16 x float> @llvm.masked.gather.v16f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
   ret <16 x float> %res
 }
 define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
@@ -1872,10 +1872,10 @@ define <16 x double> @test_gather_16f64(
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    retl
-  %res = call <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
+  %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
   ret <16 x double> %res
 }
-declare <16 x double> @llvm.masked.gather.v16f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
+declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
 define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0)  {
 ; KNL_64-LABEL: test_scatter_16i32:
 ; KNL_64:       # BB#0:
@@ -1918,7 +1918,7 @@ define void @test_scatter_16i32(<16 x i3
 ; SKX_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
 define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
@@ -1993,10 +1993,10 @@ define void @test_scatter_16i64(<16 x i6
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.scatter.v16i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
+declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
 define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
 ; KNL_64-LABEL: test_scatter_16f32:
 ; KNL_64:       # BB#0:
@@ -2039,10 +2039,10 @@ define void @test_scatter_16f32(<16 x fl
 ; SKX_32-NEXT:    vscatterdps %zmm2, (,%zmm0) {%k1}
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.scatter.v16f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
+declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
 define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
 ; KNL_64-LABEL: test_scatter_16f64:
 ; KNL_64:       # BB#0:
@@ -2115,10 +2115,10 @@ define void @test_scatter_16f64(<16 x do
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    vzeroupper
 ; SKX_32-NEXT:    retl
-  call void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
+  call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
   ret void
 }
-declare void @llvm.masked.scatter.v16f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
+declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
 
 define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) {
 ; KNL_64-LABEL: test_pr28312:
@@ -2193,11 +2193,11 @@ define <4 x i64> @test_pr28312(<4 x i64*
 ; SKX_32-NEXT:    movl %ebp, %esp
 ; SKX_32-NEXT:    popl %ebp
 ; SKX_32-NEXT:    retl
-  %g1 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
-  %g2 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
-  %g3 = call <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
+  %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
+  %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
+  %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
   %a = add <4 x i64> %g1, %g2
   %b = add <4 x i64> %a, %g3
   ret <4 x i64> %b
 }
-declare <4 x i64> @llvm.masked.gather.v4i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
+declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)

Modified: llvm/trunk/test/Transforms/FunctionAttrs/readattrs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/FunctionAttrs/readattrs.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/FunctionAttrs/readattrs.ll (original)
+++ llvm/trunk/test/Transforms/FunctionAttrs/readattrs.ll Wed May  3 07:28:54 2017
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -functionattrs -S | FileCheck %s
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s
 @x = global i32 0
@@ -68,22 +69,22 @@ entry:
 }
 
 ; CHECK: declare void @llvm.masked.scatter
-declare void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
+declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*>, i32, <4 x i1>)
 
 ; CHECK-NOT: readnone
 ; CHECK-NOT: readonly
 ; CHECK: define void @test9
 define void @test9(<4 x i32*> %ptrs, <4 x i32>%val) {
-  call void @llvm.masked.scatter.v4i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
+  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32>%val, <4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>)
   ret void
 }
 
 ; CHECK: declare <4 x i32> @llvm.masked.gather
-declare <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
 ; CHECK: readonly
 ; CHECK: define <4 x i32> @test10
 define <4 x i32> @test10(<4 x i32*> %ptrs) {
-  %res = call <4 x i32> @llvm.masked.gather.v4i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %ptrs, i32 4, <4 x i1><i1 true, i1 false, i1 true, i1 false>, <4 x i32>undef)
   ret <4 x i32> %res
 }
 

Modified: llvm/trunk/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll (original)
+++ llvm/trunk/test/Transforms/GVN/2016-08-30-MaskedScatterGather.ll Wed May  3 07:28:54 2017
@@ -1,7 +1,7 @@
 ; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
 
-declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
 
 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
 ; do not have pointer aliasing ignored when being processed.
@@ -20,18 +20,18 @@ entry:
   %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
   %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
   ; Read from in1 and in2
-  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
-  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in1 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in1 from the allocas
   ; This gather should alias the scatter we just saw
-  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in2 from the allocas
   ; This gather should alias the scatter we just saw, and not be eliminated
-  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to out for good measure
   %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
   %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1

Modified: llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/masked_intrinsics.ll Wed May  3 07:28:54 2017
@@ -2,8 +2,8 @@
 
 declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
 declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
-declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
-declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
+declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
+declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
 
 define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru)  {
   %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
@@ -49,7 +49,7 @@ define void @store_onemask(<2 x double>*
 }
 
 define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru)  {
-  %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru)
+  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru)
   ret <2 x double> %res
 
 ; CHECK-LABEL: @gather_zeromask(
@@ -57,7 +57,7 @@ define <2 x double> @gather_zeromask(<2
 }
 
 define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val)  {
-  call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
+  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
   ret void
 
 ; CHECK-LABEL: @scatter_zeromask(

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/consecutive-ptr-uniforms.ll Wed May  3 07:28:54 2017
@@ -36,7 +36,7 @@ target triple = "x86_64-unknown-linux-gn
 ; CHECK-NEXT:    [[WIDE_VEC1:%.*]] = load <80 x float>, <80 x float>* [[TMP4]], align 4
 ; CHECK-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <80 x float> [[WIDE_VEC1]], <80 x float> undef, <16 x i32> <i32 0, i32 5, i32 10, i32 15, i32 20, i32 25, i32 30, i32 35, i32 40, i32 45, i32 50, i32 55, i32 60, i32 65, i32 70, i32 75>
 ; CHECK-NEXT:    [[TMP5:%.*]] = fadd <16 x float> [[STRIDED_VEC2]], [[TMP2]]
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> [[TMP5]], <16 x float*> [[TMP3]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80, i64 80>
 ; CHECK:         br i1 {{.*}}, label %middle.block, label %vector.body

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/gather_scatter.ll Wed May  3 07:28:54 2017
@@ -17,9 +17,9 @@ target triple = "x86_64-pc_linux"
 ;}
 
 ;AVX512-LABEL: @foo1
-;AVX512: llvm.masked.load.v16i32
-;AVX512: llvm.masked.gather.v16f32
-;AVX512: llvm.masked.store.v16f32
+;AVX512: llvm.masked.load.v16i32.p0v16i32
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
+;AVX512: llvm.masked.store.v16f32.p0v16f32
 ;AVX512: ret void
 
 ; Function Attrs: nounwind uwtable
@@ -96,8 +96,8 @@ for.end:
 
 ;AVX512-LABEL: @foo2
 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
-;AVX512: llvm.masked.gather.v16f32
-;AVX512: llvm.masked.scatter.v16f32
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
+;AVX512: llvm.masked.scatter.v16f32.v16p0f32
 ;AVX512: ret void
 define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
 entry:
@@ -171,10 +171,10 @@ for.end:
 
 ;AVX512-LABEL: @foo3
 ;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
-;AVX512: llvm.masked.gather.v16f32
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
 ;AVX512: fadd <16 x float>
 ;AVX512: getelementptr inbounds %struct.Out, %struct.Out* %out, <16 x i64> {{.*}}, i32 1
-;AVX512: llvm.masked.scatter.v16f32
+;AVX512: llvm.masked.scatter.v16f32.v16p0f32
 ;AVX512: ret void
 
 %struct.Out = type { float, float }
@@ -233,4 +233,194 @@ for.inc:
 for.end:                                          ; preds = %for.cond
   ret void
 }
-declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
+
+; The same as @foo2 but scatter/gather argument is a vecotr of ptrs with addresspace 1
+
+;AVX512-LABEL: @foo2_addrspace
+;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32.v16p1f32
+;AVX512: llvm.masked.scatter.v16f32.v16p1f32
+;AVX512: ret void
+define void @foo2_addrspace(%struct.In addrspace(1)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
+entry:
+  %in.addr = alloca %struct.In addrspace(1)*, align 8
+  %out.addr = alloca float addrspace(1)*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %index.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8
+  store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32* %index, i32** %index.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 4096
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32*, i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %3 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %3, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32, i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8
+  %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2
+  %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1
+  %6 = load float, float addrspace(1)* %b, align 4
+  %add = fadd float %6, 5.000000e-01
+  %7 = load i32, i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4
+  store float %add, float addrspace(1)* %arrayidx5, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %9, 16
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Same as foo2_addrspace but here only the input has the non-default address space.
+
+;AVX512-LABEL: @foo2_addrspace2
+;AVX512: getelementptr inbounds %struct.In, %struct.In addrspace(1)* %in, <16 x i64> {{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32.v16p1f32
+;AVX512: llvm.masked.scatter.v16f32.v16p0f32
+;AVX512: ret void
+define void @foo2_addrspace2(%struct.In addrspace(1)* noalias %in, float addrspace(0)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+entry:
+  %in.addr = alloca %struct.In addrspace(1)*, align 8
+  %out.addr = alloca float addrspace(0)*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %index.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store %struct.In addrspace(1)* %in, %struct.In addrspace(1)** %in.addr, align 8
+  store float addrspace(0)* %out, float addrspace(0)** %out.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32* %index, i32** %index.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 4096
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32*, i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %3 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %3, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32, i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load %struct.In addrspace(1)*, %struct.In addrspace(1)** %in.addr, align 8
+  %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %5, i64 %idxprom2
+  %b = getelementptr inbounds %struct.In, %struct.In addrspace(1)* %arrayidx3, i32 0, i32 1
+  %6 = load float, float addrspace(1)* %b, align 4
+  %add = fadd float %6, 5.000000e-01
+  %7 = load i32, i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load float addrspace(0)*, float addrspace(0)** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds float, float addrspace(0)* %8, i64 %idxprom4
+  store float %add, float addrspace(0)* %arrayidx5, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %9, 16
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+; Same as foo2_addrspace but here only the output has the non-default address space.
+
+;AVX512-LABEL: @foo2_addrspace3
+;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> {{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32.v16p0f32
+;AVX512: llvm.masked.scatter.v16f32.v16p1f32
+;AVX512: ret void
+
+define void @foo2_addrspace3(%struct.In addrspace(0)* noalias %in, float addrspace(1)* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+entry:
+  %in.addr = alloca %struct.In addrspace(0)*, align 8
+  %out.addr = alloca float addrspace(1)*, align 8
+  %trigger.addr = alloca i32*, align 8
+  %index.addr = alloca i32*, align 8
+  %i = alloca i32, align 4
+  store %struct.In addrspace(0)* %in, %struct.In addrspace(0)** %in.addr, align 8
+  store float addrspace(1)* %out, float addrspace(1)** %out.addr, align 8
+  store i32* %trigger, i32** %trigger.addr, align 8
+  store i32* %index, i32** %index.addr, align 8
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %0 = load i32, i32* %i, align 4
+  %cmp = icmp slt i32 %0, 4096
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %1 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %1 to i64
+  %2 = load i32*, i32** %trigger.addr, align 8
+  %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+  %3 = load i32, i32* %arrayidx, align 4
+  %cmp1 = icmp sgt i32 %3, 0
+  br i1 %cmp1, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %4 = load i32, i32* %i, align 4
+  %idxprom2 = sext i32 %4 to i64
+  %5 = load %struct.In addrspace(0)*, %struct.In addrspace(0)** %in.addr, align 8
+  %arrayidx3 = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %5, i64 %idxprom2
+  %b = getelementptr inbounds %struct.In, %struct.In addrspace(0)* %arrayidx3, i32 0, i32 1
+  %6 = load float, float addrspace(0)* %b, align 4
+  %add = fadd float %6, 5.000000e-01
+  %7 = load i32, i32* %i, align 4
+  %idxprom4 = sext i32 %7 to i64
+  %8 = load float addrspace(1)*, float addrspace(1)** %out.addr, align 8
+  %arrayidx5 = getelementptr inbounds float, float addrspace(1)* %8, i64 %idxprom4
+  store float %add, float addrspace(1)* %arrayidx5, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %9 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %9, 16
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}

Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll (original)
+++ llvm/trunk/test/Transforms/LoopVectorize/X86/scatter_crash.ll Wed May  3 07:28:54 2017
@@ -23,11 +23,11 @@ define void @_Z3fn1v() #0 {
 ; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, <16 x i64> [[VEC_IND]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP12]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP13]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[TMP14:%.*]] = or <16 x i64> [[VEC_IND3]], <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
 ; CHECK-NEXT:    [[TMP15:%.*]] = add nsw <16 x i64> [[TMP10]], [[TMP14]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP11]], <16 x i64> [[TMP15]], i64 0
-; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT:    call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[TMP16]], i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
 ; CHECK-NEXT:    [[VEC_IND_NEXT4]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>

Modified: llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll (original)
+++ llvm/trunk/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll Wed May  3 07:28:54 2017
@@ -1,8 +1,8 @@
 ; XFAIL: *
 ; RUN: opt < %s -basicaa -newgvn -S | FileCheck %s
 
-declare void @llvm.masked.scatter.v2i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
-declare <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
+declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
+declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
 
 ; This test ensures that masked scatter and gather operations, which take vectors of pointers,
 ; do not have pointer aliasing ignored when being processed.
@@ -21,18 +21,18 @@ entry:
   %tmp.i = insertelement <2 x i32*> undef, i32* %tmp.0, i32 0
   %tmp = insertelement <2 x i32*> %tmp.i, i32* %tmp.1, i32 1
   ; Read from in1 and in2
-  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
-  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in1.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in1, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %in2.v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %in2, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in1 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in1.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in1 from the allocas
   ; This gather should alias the scatter we just saw
-  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.0 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to the allocas
-  call void @llvm.masked.scatter.v2i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
+  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %in2.v, <2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>);
   ; Read in2 from the allocas
   ; This gather should alias the scatter we just saw, and not be eliminated
-  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
+  %tmp.v.1 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %tmp, i32 1, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) #1
   ; Store in2 to out for good measure
   %tmp.v.1.0 = extractelement <2 x i32> %tmp.v.1, i32 0
   %tmp.v.1.1 = extractelement <2 x i32> %tmp.v.1, i32 1

Added: llvm/trunk/test/Verifier/scatter_gather.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Verifier/scatter_gather.ll?rev=302018&view=auto
==============================================================================
--- llvm/trunk/test/Verifier/scatter_gather.ll (added)
+++ llvm/trunk/test/Verifier/scatter_gather.ll Wed May  3 07:28:54 2017
@@ -0,0 +1,122 @@
+; RUN: not opt -verify < %s 2>&1 | FileCheck %s
+
+; Mask is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define <16 x float> @gather2(<16 x float*> %ptrs, <16 x i1>* %mask, <16 x float> %passthru) {
+  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1>* %mask, <16 x float> %passthru)
+  ret <16 x float> %res
+}
+declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>*, <16 x float>)
+
+; Mask length != return length
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather3(<8 x float*> %ptrs, <16 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <16 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <16 x i1>, <8 x float>)
+
+; Return type is not a vector
+; CHECK: Intrinsic has incorrect return type!
+define <8 x float>* @gather4(<8 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float>* %res
+}
+declare <8 x float>* @llvm.masked.gather.p0v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
+
+; Value type is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather5(<8 x float*>* %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>* %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.p0v8p0f32(<8 x float*>*, i32, <8 x i1>, <8 x float>)
+
+; Value type is not a vector of pointers
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather6(<8 x float> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v8f32(<8 x float>, i32, <8 x i1>, <8 x float>)
+
+; Value element type != vector of pointers element
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather7(<8 x double*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0f64(<8 x double*>, i32, <8 x i1>, <8 x float>)
+
+; Value length!= vector of pointers length
+; CHECK: Intrinsic has incorrect argument type!
+define <8 x float> @gather8(<16 x float*> %ptrs, <8 x i1> %mask, <8 x float> %passthru) {
+  %res = call <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*> %ptrs, i32 4, <8 x i1> %mask, <8 x float> %passthru)
+  ret <8 x float> %res
+}
+declare <8 x float> @llvm.masked.gather.v8f32.v16p0f32(<16 x float*>, i32, <8 x i1>, <8 x float>)
+
+; Passthru type doesn't match return type 
+; CHECK: Intrinsic has incorrect argument type!
+define <16 x i32> @gather9(<16 x i32*> %ptrs, <16 x i1> %mask, <8 x i32> %passthru) {
+  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <8 x i32> %passthru)
+  ret <16 x i32> %res
+}
+declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <8 x i32>)
+
+; Mask is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter2(<16 x float> %value, <16 x float*> %ptrs, <16 x i1>* %mask) {
+  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %value, <16 x float*> %ptrs, i32 4, <16 x i1>* %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float>, <16 x float*>, i32, <16 x i1>*)
+
+; Mask length != value length
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter3(<8 x float> %value, <8 x float*> %ptrs, <16 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float> %value, <8 x float*> %ptrs, i32 4, <16 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v8p0f32(<8 x float>, <8 x float*>, i32, <16 x i1>)
+
+; Value type is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter4(<8 x float>* %value, <8 x float*> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>* %value, <8 x float*> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.p0v8f32.v8p0f32(<8 x float>*, <8 x float*>, i32, <8 x i1>)
+
+; ptrs is not a vector
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter5(<8 x float> %value, <8 x float*>* %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float> %value, <8 x float*>* %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.p0v8p0f32(<8 x float>, <8 x float*>*, i32, <8 x i1>)
+
+; Value type is not a vector of pointers
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter6(<8 x float> %value, <8 x float> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v8f32(<8 x float> %value, <8 x float> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v8f32(<8 x float>, <8 x float>, i32, <8 x i1>)
+
+; Value element type != vector of pointers element
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter7(<8 x float> %value, <8 x double*> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float> %value, <8 x double*> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v8p0f64(<8 x float>, <8 x double*>, i32, <8 x i1>)
+
+; Value length!= vector of pointers length
+; CHECK: Intrinsic has incorrect argument type!
+define void @scatter8(<8 x float> %value, <16 x float*> %ptrs, <8 x i1> %mask) {
+  call void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float> %value, <16 x float*> %ptrs, i32 4, <8 x i1> %mask)
+  ret void
+}
+declare void @llvm.masked.scatter.v8f32.v16p0f32(<8 x float>, <16 x float*>, i32, <8 x i1>)
+

Modified: llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp?rev=302018&r1=302017&r2=302018&view=diff
==============================================================================
--- llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp (original)
+++ llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp Wed May  3 07:28:54 2017
@@ -211,13 +211,12 @@ enum IIT_Info {
   IIT_SAME_VEC_WIDTH_ARG = 31,
   IIT_PTR_TO_ARG = 32,
   IIT_PTR_TO_ELT = 33,
-  IIT_VEC_OF_PTRS_TO_ELT = 34,
+  IIT_VEC_OF_ANYPTRS_TO_ELT = 34,
   IIT_I128 = 35,
   IIT_V512 = 36,
   IIT_V1024 = 37
 };
 
-
 static void EncodeFixedValueType(MVT::SimpleValueType VT,
                                  std::vector<unsigned char> &Sig) {
   if (MVT(VT).isInteger()) {
@@ -273,9 +272,16 @@ static void EncodeFixedType(Record *R, s
     }
     else if (R->isSubClassOf("LLVMPointerTo"))
       Sig.push_back(IIT_PTR_TO_ARG);
-    else if (R->isSubClassOf("LLVMVectorOfPointersToElt"))
-      Sig.push_back(IIT_VEC_OF_PTRS_TO_ELT);
-    else if (R->isSubClassOf("LLVMPointerToElt"))
+    else if (R->isSubClassOf("LLVMVectorOfAnyPointersToElt")) {
+      Sig.push_back(IIT_VEC_OF_ANYPTRS_TO_ELT);
+      unsigned ArgNo = ArgCodes.size();
+      ArgCodes.push_back(3 /*vAny*/);
+      // Encode overloaded ArgNo
+      Sig.push_back(ArgNo);
+      // Encode LLVMMatchType<Number> ArgNo
+      Sig.push_back(Number);
+      return;
+    } else if (R->isSubClassOf("LLVMPointerToElt"))
       Sig.push_back(IIT_PTR_TO_ELT);
     else
       Sig.push_back(IIT_ARG);