[llvm] r351957 - [IR] Match intrinsic parameter by scalar/vectorwidth

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 23 23:49:39 PST 2019


Update LangRef so it doesn't say the second type must be an i1?

~Craig


On Wed, Jan 23, 2019 at 8:00 AM Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: rksimon
> Date: Wed Jan 23 08:00:22 2019
> New Revision: 351957
>
> URL: http://llvm.org/viewvc/llvm-project?rev=351957&view=rev
> Log:
> [IR] Match intrinsic parameter by scalar/vectorwidth
>
> This patch replaces the existing LLVMVectorSameWidth matcher with
> LLVMScalarOrSameVectorWidth.
>
> The matching args must be either scalars or vectors with the same number
> of elements, but in either case the scalar/element type can differ,
> specified by LLVMScalarOrSameVectorWidth.
>
> I've updated the _overflow intrinsics to demonstrate this - allowing it to
> return a i1 or <N x i1> overflow result, matching the scalar/vectorwidth of
> the other (add/sub/mul) result type.
>
> The masked load/store/gather/scatter intrinsics have also been updated to
> use this, although as we specify the reference type to be llvm_anyvector_ty
> we guarantee the mask will be <N x i1> so no change in behaviour
>
> Differential Revision: https://reviews.llvm.org/D57090
>
> Added:
>     llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll
> Modified:
>     llvm/trunk/include/llvm/IR/Intrinsics.td
>     llvm/trunk/lib/IR/Function.cpp
>     llvm/trunk/utils/TableGen/CodeGenTarget.cpp
>     llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
>
> Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
> +++ llvm/trunk/include/llvm/IR/Intrinsics.td Wed Jan 23 08:00:22 2019
> @@ -156,10 +156,15 @@ class LLVMMatchType<int num>
>  // the intrinsic is overloaded, so the matched type should be declared as
> iAny.
>  class LLVMExtendedType<int num> : LLVMMatchType<num>;
>  class LLVMTruncatedType<int num> : LLVMMatchType<num>;
> -class LLVMVectorSameWidth<int num, LLVMType elty>
> -  : LLVMMatchType<num> {
> +
> +// Match the scalar/vector of another intrinsic parameter but with a
> different
> +// element type. Either both are scalars or both are vectors with the same
> +// number of elements.
> +class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
> +  : LLVMMatchType<idx> {
>    ValueType ElTy = elty.VT;
>  }
> +
>  class LLVMPointerTo<int num> : LLVMMatchType<num>;
>  class LLVMPointerToElt<int num> : LLVMMatchType<num>;
>  class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
> @@ -796,24 +801,30 @@ def int_adjust_trampoline : Intrinsic<[l
>  //
>
>  // Expose the carry flag from add operations on two integrals.
> -def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
> +                                        LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                         [LLVMMatchType<0>,
> LLVMMatchType<0>],
>                                         [IntrNoMem, IntrSpeculatable]>;
> -def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
> +                                        LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                         [LLVMMatchType<0>,
> LLVMMatchType<0>],
>                                         [IntrNoMem, IntrSpeculatable]>;
>
> -def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
> +                                        LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                         [LLVMMatchType<0>,
> LLVMMatchType<0>],
>                                         [IntrNoMem, IntrSpeculatable]>;
> -def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
> +                                        LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                         [LLVMMatchType<0>,
> LLVMMatchType<0>],
>                                         [IntrNoMem, IntrSpeculatable]>;
>
> -def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
> +                                        LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                         [LLVMMatchType<0>,
> LLVMMatchType<0>],
>                                         [IntrNoMem, IntrSpeculatable]>;
> -def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
> +                                        LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                         [LLVMMatchType<0>,
> LLVMMatchType<0>],
>                                         [IntrNoMem, IntrSpeculatable]>;
>
> @@ -1001,35 +1012,35 @@ def int_is_constant : Intrinsic<[llvm_i1
>  def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
>
>  LLVMAnyPointerType<LLVMMatchType<0>>,
>                                        llvm_i32_ty,
> -                                      LLVMVectorSameWidth<0, llvm_i1_ty>],
> +                                      LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                   [IntrArgMemOnly]>;
>
>  def int_masked_load  : Intrinsic<[llvm_anyvector_ty],
>                                   [LLVMAnyPointerType<LLVMMatchType<0>>,
> llvm_i32_ty,
> -                                  LLVMVectorSameWidth<0, llvm_i1_ty>,
> LLVMMatchType<0>],
> +                                  LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>, LLVMMatchType<0>],
>                                   [IntrReadMem, IntrArgMemOnly]>;
>
>  def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
>                                   [LLVMVectorOfAnyPointersToElt<0>,
> llvm_i32_ty,
> -                                  LLVMVectorSameWidth<0, llvm_i1_ty>,
> +                                  LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>,
>                                    LLVMMatchType<0>],
>                                   [IntrReadMem]>;
>
>  def int_masked_scatter: Intrinsic<[],
>                                    [llvm_anyvector_ty,
>                                     LLVMVectorOfAnyPointersToElt<0>,
> llvm_i32_ty,
> -                                   LLVMVectorSameWidth<0, llvm_i1_ty>]>;
> +                                   LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>]>;
>
>  def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
>                                       [LLVMPointerToElt<0>,
> -                                      LLVMVectorSameWidth<0, llvm_i1_ty>,
> +                                      LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>,
>                                        LLVMMatchType<0>],
>                                       [IntrReadMem]>;
>
>  def int_masked_compressstore: Intrinsic<[],
>                                       [llvm_anyvector_ty,
>                                        LLVMPointerToElt<0>,
> -                                      LLVMVectorSameWidth<0, llvm_i1_ty>],
> +                                      LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
>                                       [IntrArgMemOnly]>;
>
>  // Test whether a pointer is associated with a type metadata identifier.
>
> Modified: llvm/trunk/lib/IR/Function.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Function.cpp?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/IR/Function.cpp (original)
> +++ llvm/trunk/lib/IR/Function.cpp Wed Jan 23 08:00:22 2019
> @@ -948,10 +948,9 @@ static Type *DecodeFixedType(ArrayRef<In
>    case IITDescriptor::SameVecWidthArgument: {
>      Type *EltTy = DecodeFixedType(Infos, Tys, Context);
>      Type *Ty = Tys[D.getArgumentNumber()];
> -    if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
> +    if (auto *VTy = dyn_cast<VectorType>(Ty))
>        return VectorType::get(EltTy, VTy->getNumElements());
> -    }
> -    llvm_unreachable("unhandled");
> +    return EltTy;
>    }
>    case IITDescriptor::PtrToArgument: {
>      Type *Ty = Tys[D.getArgumentNumber()];
> @@ -1135,15 +1134,19 @@ bool Intrinsic::matchIntrinsicType(Type
>      case IITDescriptor::SameVecWidthArgument: {
>        if (D.getArgumentNumber() >= ArgTys.size())
>          return true;
> -      VectorType * ReferenceType =
> -        dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
> -      VectorType *ThisArgType = dyn_cast<VectorType>(Ty);
> -      if (!ThisArgType || !ReferenceType ||
> -          (ReferenceType->getVectorNumElements() !=
> -           ThisArgType->getVectorNumElements()))
> -        return true;
> -      return matchIntrinsicType(ThisArgType->getVectorElementType(),
> -                                Infos, ArgTys);
> +      auto *ReferenceType =
> dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
> +      auto *ThisArgType = dyn_cast<VectorType>(Ty);
> +      // Both must be vectors of the same number of elements or neither.
> +      if ((ReferenceType != nullptr) != (ThisArgType != nullptr))
> +        return true;
> +      Type *EltTy = Ty;
> +      if (ThisArgType) {
> +        if (ReferenceType->getVectorNumElements() !=
> +            ThisArgType->getVectorNumElements())
> +          return true;
> +        EltTy = ThisArgType->getVectorElementType();
> +      }
> +      return matchIntrinsicType(EltTy, Infos, ArgTys);
>      }
>      case IITDescriptor::PtrToArgument: {
>        if (D.getArgumentNumber() >= ArgTys.size())
>
> Added: llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll?rev=351957&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll (added)
> +++ llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll Wed Jan 23
> 08:00:22 2019
> @@ -0,0 +1,414 @@
> +; NOTE: Assertions have been autogenerated by
> utils/update_analyze_test_checks.py
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx512f,+avx512bw | FileCheck %s
> --check-prefixes=CHECK,AVX512,AVX512BW
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx512f,+avx512dq | FileCheck %s
> --check-prefixes=CHECK,AVX512,AVX512DQ
> +;
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
> +
> +;
> +; sadd.with.overflow
> +;
> +
> +declare {i64, i1}              @llvm.sadd.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>}  @llvm.sadd.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>}  @llvm.sadd.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>}  @llvm.sadd.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1}               @llvm.sadd.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>}   @llvm.sadd.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>}   @llvm.sadd.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1}               @llvm.sadd.with.overflow.i16(i16, i16)
> +declare {<8 x i16>,  <8 x i1>}  @llvm.sadd.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1}                @llvm.sadd.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>}  @llvm.sadd.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>}  @llvm.sadd.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>}  @llvm.sadd.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @sadd(i32 %arg) {
> +; CHECK-LABEL: 'sadd'
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> +  %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
> +  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> +  %I32 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
> +  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.sadd.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> +  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.sadd.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> +  %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> +  %I16 = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
> +  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.sadd.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> +  %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +  %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> +  %I8 = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
> +  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> +  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> +  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> +  ret i32 undef
> +}
> +
> +;
> +; uadd.with.overflow
> +;
> +
> +declare {i64, i1}              @llvm.uadd.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>}  @llvm.uadd.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>}  @llvm.uadd.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>}  @llvm.uadd.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1}               @llvm.uadd.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>}   @llvm.uadd.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>}   @llvm.uadd.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1}               @llvm.uadd.with.overflow.i16(i16, i16)
> +declare {<8 x i16>,  <8 x i1>}  @llvm.uadd.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1}                @llvm.uadd.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>}  @llvm.uadd.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>}  @llvm.uadd.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>}  @llvm.uadd.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @uadd(i32 %arg) {
> +; CHECK-LABEL: 'uadd'
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> +  %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
> +  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> +  %I32 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
> +  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.uadd.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> +  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.uadd.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> +  %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> +  %I16 = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
> +  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.uadd.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> +  %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +  %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> +  %I8 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
> +  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> +  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> +  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> +  ret i32 undef
> +}
> +
> +;
> +; ssub.with.overflow
> +;
> +
> +declare {i64, i1}              @llvm.ssub.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>}  @llvm.ssub.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>}  @llvm.ssub.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>}  @llvm.ssub.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1}               @llvm.ssub.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>}   @llvm.ssub.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>}   @llvm.ssub.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1}               @llvm.ssub.with.overflow.i16(i16, i16)
> +declare {<8 x i16>,  <8 x i1>}  @llvm.ssub.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1}                @llvm.ssub.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>}  @llvm.ssub.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>}  @llvm.ssub.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>}  @llvm.ssub.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @ssub(i32 %arg) {
> +; CHECK-LABEL: 'ssub'
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> +  %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
> +  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> +  %I32 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
> +  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.ssub.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> +  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.ssub.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> +  %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> +  %I16 = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
> +  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.ssub.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> +  %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +  %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> +  %I8 = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
> +  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> +  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> +  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> +  ret i32 undef
> +}
> +
> +;
> +; usub.with.overflow
> +;
> +
> +declare {i64, i1}              @llvm.usub.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>}  @llvm.usub.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>}  @llvm.usub.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>}  @llvm.usub.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1}               @llvm.usub.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>}   @llvm.usub.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>}   @llvm.usub.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1}               @llvm.usub.with.overflow.i16(i16, i16)
> +declare {<8 x i16>,  <8 x i1>}  @llvm.usub.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1}                @llvm.usub.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>}  @llvm.usub.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>}  @llvm.usub.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>}  @llvm.usub.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @usub(i32 %arg) {
> +; CHECK-LABEL: 'usub'
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> +  %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
> +  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> +  %I32 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
> +  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.usub.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> +  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.usub.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> +  %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> +  %I16 = call {i16, i1} @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
> +  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.usub.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> +  %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +  %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> +  %I8 = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
> +  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> +  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> +  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> +  ret i32 undef
> +}
> +
> +;
> +; smul.with.overflow
> +;
> +
> +declare {i64, i1}              @llvm.smul.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>}  @llvm.smul.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>}  @llvm.smul.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>}  @llvm.smul.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1}               @llvm.smul.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>}   @llvm.smul.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>}   @llvm.smul.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1}               @llvm.smul.with.overflow.i16(i16, i16)
> +declare {<8 x i16>,  <8 x i1>}  @llvm.smul.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1}                @llvm.smul.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>}  @llvm.smul.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>}  @llvm.smul.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>}  @llvm.smul.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @smul(i32 %arg) {
> +; CHECK-LABEL: 'smul'
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> +  %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
> +  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> +  %I32 = call {i32, i1} @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
> +  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.smul.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> +  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.smul.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> +  %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> +  %I16 = call {i16, i1} @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
> +  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.smul.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> +  %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +  %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> +  %I8 = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
> +  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> +  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> +  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> +  ret i32 undef
> +}
> +
> +;
> +; umul.with.overflow
> +;
> +
> +declare {i64, i1}              @llvm.umul.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>}  @llvm.umul.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>}  @llvm.umul.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>}  @llvm.umul.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1}               @llvm.umul.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>}   @llvm.umul.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>}   @llvm.umul.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1}               @llvm.umul.with.overflow.i16(i16, i16)
> +declare {<8 x i16>,  <8 x i1>}  @llvm.umul.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1}                @llvm.umul.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>}  @llvm.umul.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>}  @llvm.umul.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>}  @llvm.umul.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @umul(i32 %arg) {
> +; CHECK-LABEL: 'umul'
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT:  Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> +  %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
> +  %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +  %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +  %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> +  %I32 = call {i32, i1} @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
> +  %V4I32  = call {<4 x i32>, <4 x i1>}  @llvm.umul.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> +  %V8I32  = call {<8 x i32>, <8 x i1>}  @llvm.umul.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> +  %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> +  %I16 = call {i16, i1} @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
> +  %V8I16  = call {<8 x i16>, <8 x i1>}  @llvm.umul.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> +  %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +  %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> +  %I8 = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
> +  %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> +  %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> +  %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> +  ret i32 undef
> +}
>
> Modified: llvm/trunk/utils/TableGen/CodeGenTarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenTarget.cpp?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/CodeGenTarget.cpp (original)
> +++ llvm/trunk/utils/TableGen/CodeGenTarget.cpp Wed Jan 23 08:00:22 2019
> @@ -633,7 +633,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Recor
>        // overloaded, all the types can be specified directly.
>        assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
>                 !TyEl->isSubClassOf("LLVMTruncatedType") &&
> -               !TyEl->isSubClassOf("LLVMVectorSameWidth")) ||
> +               !TyEl->isSubClassOf("LLVMScalarOrSameVectorWidth")) ||
>                VT == MVT::iAny || VT == MVT::vAny) &&
>               "Expected iAny or vAny type");
>      } else
>
> Modified: llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp (original)
> +++ llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp Wed Jan 23 08:00:22 2019
> @@ -269,7 +269,7 @@ static void EncodeFixedType(Record *R, s
>        Sig.push_back(IIT_TRUNC_ARG);
>      else if (R->isSubClassOf("LLVMHalfElementsVectorType"))
>        Sig.push_back(IIT_HALF_VEC_ARG);
> -    else if (R->isSubClassOf("LLVMVectorSameWidth")) {
> +    else if (R->isSubClassOf("LLVMScalarOrSameVectorWidth")) {
>        Sig.push_back(IIT_SAME_VEC_WIDTH_ARG);
>        Sig.push_back((Number << 3) | ArgCodes[Number]);
>        MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy"));
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190123/dd9e99d6/attachment-0001.html>


More information about the llvm-commits mailing list