[llvm] r351957 - [IR] Match intrinsic parameter by scalar/vectorwidth
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 23 23:49:39 PST 2019
Update LangRef so it doesn't say the second type must be an i1?
~Craig
On Wed, Jan 23, 2019 at 8:00 AM Simon Pilgrim via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: rksimon
> Date: Wed Jan 23 08:00:22 2019
> New Revision: 351957
>
> URL: http://llvm.org/viewvc/llvm-project?rev=351957&view=rev
> Log:
> [IR] Match intrinsic parameter by scalar/vectorwidth
>
> This patch replaces the existing LLVMVectorSameWidth matcher with
> LLVMScalarOrSameVectorWidth.
>
> The matching args must be either scalars or vectors with the same number
> of elements, but in either case the scalar/element type can differ,
> specified by LLVMScalarOrSameVectorWidth.
>
> I've updated the _overflow intrinsics to demonstrate this - allowing it to
> return a i1 or <N x i1> overflow result, matching the scalar/vectorwidth of
> the other (add/sub/mul) result type.
>
> The masked load/store/gather/scatter intrinsics have also been updated to
> use this, although as we specify the reference type to be llvm_anyvector_ty
> we guarantee the mask will be <N x i1> so no change in behaviour
>
> Differential Revision: https://reviews.llvm.org/D57090
>
> Added:
> llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll
> Modified:
> llvm/trunk/include/llvm/IR/Intrinsics.td
> llvm/trunk/lib/IR/Function.cpp
> llvm/trunk/utils/TableGen/CodeGenTarget.cpp
> llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
>
> Modified: llvm/trunk/include/llvm/IR/Intrinsics.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Intrinsics.td?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/include/llvm/IR/Intrinsics.td (original)
> +++ llvm/trunk/include/llvm/IR/Intrinsics.td Wed Jan 23 08:00:22 2019
> @@ -156,10 +156,15 @@ class LLVMMatchType<int num>
> // the intrinsic is overloaded, so the matched type should be declared as
> iAny.
> class LLVMExtendedType<int num> : LLVMMatchType<num>;
> class LLVMTruncatedType<int num> : LLVMMatchType<num>;
> -class LLVMVectorSameWidth<int num, LLVMType elty>
> - : LLVMMatchType<num> {
> +
> +// Match the scalar/vector of another intrinsic parameter but with a
> different
> +// element type. Either both are scalars or both are vectors with the same
> +// number of elements.
> +class LLVMScalarOrSameVectorWidth<int idx, LLVMType elty>
> + : LLVMMatchType<idx> {
> ValueType ElTy = elty.VT;
> }
> +
> class LLVMPointerTo<int num> : LLVMMatchType<num>;
> class LLVMPointerToElt<int num> : LLVMMatchType<num>;
> class LLVMVectorOfAnyPointersToElt<int num> : LLVMMatchType<num>;
> @@ -796,24 +801,30 @@ def int_adjust_trampoline : Intrinsic<[l
> //
>
> // Expose the carry flag from add operations on two integrals.
> -def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_sadd_with_overflow : Intrinsic<[llvm_anyint_ty,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [LLVMMatchType<0>,
> LLVMMatchType<0>],
> [IntrNoMem, IntrSpeculatable]>;
> -def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_uadd_with_overflow : Intrinsic<[llvm_anyint_ty,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [LLVMMatchType<0>,
> LLVMMatchType<0>],
> [IntrNoMem, IntrSpeculatable]>;
>
> -def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_ssub_with_overflow : Intrinsic<[llvm_anyint_ty,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [LLVMMatchType<0>,
> LLVMMatchType<0>],
> [IntrNoMem, IntrSpeculatable]>;
> -def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_usub_with_overflow : Intrinsic<[llvm_anyint_ty,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [LLVMMatchType<0>,
> LLVMMatchType<0>],
> [IntrNoMem, IntrSpeculatable]>;
>
> -def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_smul_with_overflow : Intrinsic<[llvm_anyint_ty,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [LLVMMatchType<0>,
> LLVMMatchType<0>],
> [IntrNoMem, IntrSpeculatable]>;
> -def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty, llvm_i1_ty],
> +def int_umul_with_overflow : Intrinsic<[llvm_anyint_ty,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [LLVMMatchType<0>,
> LLVMMatchType<0>],
> [IntrNoMem, IntrSpeculatable]>;
>
> @@ -1001,35 +1012,35 @@ def int_is_constant : Intrinsic<[llvm_i1
> def int_masked_store : Intrinsic<[], [llvm_anyvector_ty,
>
> LLVMAnyPointerType<LLVMMatchType<0>>,
> llvm_i32_ty,
> - LLVMVectorSameWidth<0, llvm_i1_ty>],
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [IntrArgMemOnly]>;
>
> def int_masked_load : Intrinsic<[llvm_anyvector_ty],
> [LLVMAnyPointerType<LLVMMatchType<0>>,
> llvm_i32_ty,
> - LLVMVectorSameWidth<0, llvm_i1_ty>,
> LLVMMatchType<0>],
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>, LLVMMatchType<0>],
> [IntrReadMem, IntrArgMemOnly]>;
>
> def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
> [LLVMVectorOfAnyPointersToElt<0>,
> llvm_i32_ty,
> - LLVMVectorSameWidth<0, llvm_i1_ty>,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>,
> LLVMMatchType<0>],
> [IntrReadMem]>;
>
> def int_masked_scatter: Intrinsic<[],
> [llvm_anyvector_ty,
> LLVMVectorOfAnyPointersToElt<0>,
> llvm_i32_ty,
> - LLVMVectorSameWidth<0, llvm_i1_ty>]>;
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>]>;
>
> def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
> [LLVMPointerToElt<0>,
> - LLVMVectorSameWidth<0, llvm_i1_ty>,
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>,
> LLVMMatchType<0>],
> [IntrReadMem]>;
>
> def int_masked_compressstore: Intrinsic<[],
> [llvm_anyvector_ty,
> LLVMPointerToElt<0>,
> - LLVMVectorSameWidth<0, llvm_i1_ty>],
> + LLVMScalarOrSameVectorWidth<0,
> llvm_i1_ty>],
> [IntrArgMemOnly]>;
>
> // Test whether a pointer is associated with a type metadata identifier.
>
> Modified: llvm/trunk/lib/IR/Function.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Function.cpp?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/IR/Function.cpp (original)
> +++ llvm/trunk/lib/IR/Function.cpp Wed Jan 23 08:00:22 2019
> @@ -948,10 +948,9 @@ static Type *DecodeFixedType(ArrayRef<In
> case IITDescriptor::SameVecWidthArgument: {
> Type *EltTy = DecodeFixedType(Infos, Tys, Context);
> Type *Ty = Tys[D.getArgumentNumber()];
> - if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
> + if (auto *VTy = dyn_cast<VectorType>(Ty))
> return VectorType::get(EltTy, VTy->getNumElements());
> - }
> - llvm_unreachable("unhandled");
> + return EltTy;
> }
> case IITDescriptor::PtrToArgument: {
> Type *Ty = Tys[D.getArgumentNumber()];
> @@ -1135,15 +1134,19 @@ bool Intrinsic::matchIntrinsicType(Type
> case IITDescriptor::SameVecWidthArgument: {
> if (D.getArgumentNumber() >= ArgTys.size())
> return true;
> - VectorType * ReferenceType =
> - dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
> - VectorType *ThisArgType = dyn_cast<VectorType>(Ty);
> - if (!ThisArgType || !ReferenceType ||
> - (ReferenceType->getVectorNumElements() !=
> - ThisArgType->getVectorNumElements()))
> - return true;
> - return matchIntrinsicType(ThisArgType->getVectorElementType(),
> - Infos, ArgTys);
> + auto *ReferenceType =
> dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
> + auto *ThisArgType = dyn_cast<VectorType>(Ty);
> + // Both must be vectors of the same number of elements or neither.
> + if ((ReferenceType != nullptr) != (ThisArgType != nullptr))
> + return true;
> + Type *EltTy = Ty;
> + if (ThisArgType) {
> + if (ReferenceType->getVectorNumElements() !=
> + ThisArgType->getVectorNumElements())
> + return true;
> + EltTy = ThisArgType->getVectorElementType();
> + }
> + return matchIntrinsicType(EltTy, Infos, ArgTys);
> }
> case IITDescriptor::PtrToArgument: {
> if (D.getArgumentNumber() >= ArgTys.size())
>
> Added: llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll?rev=351957&view=auto
>
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll (added)
> +++ llvm/trunk/test/Analysis/CostModel/X86/arith-overflow.ll Wed Jan 23
> 08:00:22 2019
> @@ -0,0 +1,414 @@
> +; NOTE: Assertions have been autogenerated by
> utils/update_analyze_test_checks.py
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,SSE,SSSE3
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,SSE,SSE42
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx512f | FileCheck %s --check-prefixes=CHECK,AVX512,AVX512F
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx512f,+avx512bw | FileCheck %s
> --check-prefixes=CHECK,AVX512,AVX512BW
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mattr=+avx512f,+avx512dq | FileCheck %s
> --check-prefixes=CHECK,AVX512,AVX512DQ
> +;
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mcpu=slm | FileCheck %s --check-prefixes=CHECK,SLM
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mcpu=goldmont | FileCheck %s --check-prefixes=CHECK,GLM
> +; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0
> -mcpu=btver2 | FileCheck %s --check-prefixes=CHECK,BTVER2
> +
> +;
> +; sadd.with.overflow
> +;
> +
> +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>} @llvm.sadd.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>} @llvm.sadd.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.sadd.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1} @llvm.sadd.with.overflow.i16(i16, i16)
> +declare {<8 x i16>, <8 x i1>} @llvm.sadd.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.sadd.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.sadd.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1} @llvm.sadd.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @sadd(i32 %arg) {
> +; CHECK-LABEL: 'sadd'
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.sadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.sadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.sadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.sadd.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.sadd.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.sadd.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.sadd.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.sadd.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.sadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> + %I64 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 undef, i64 undef)
> + %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.sadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> + %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.sadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> + %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.sadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> + %I32 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 undef, i32 undef)
> + %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.sadd.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> + %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.sadd.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> + %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.sadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> + %I16 = call {i16, i1} @llvm.sadd.with.overflow.i16(i16 undef, i16 undef)
> + %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.sadd.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> + %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.sadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> + %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.sadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> + %I8 = call {i8, i1} @llvm.sadd.with.overflow.i8(i8 undef, i8 undef)
> + %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.sadd.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> + %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.sadd.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> + %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.sadd.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> + ret i32 undef
> +}
> +
> +;
> +; uadd.with.overflow
> +;
> +
> +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.uadd.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
> +declare {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.uadd.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.uadd.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1} @llvm.uadd.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @uadd(i32 %arg) {
> +; CHECK-LABEL: 'uadd'
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.uadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.uadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.uadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.uadd.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.uadd.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.uadd.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.uadd.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.uadd.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> + %I64 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 undef, i64 undef)
> + %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.uadd.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> + %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.uadd.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> + %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.uadd.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> + %I32 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 undef, i32 undef)
> + %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.uadd.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> + %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> + %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.uadd.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> + %I16 = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 undef, i16 undef)
> + %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.uadd.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> + %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.uadd.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> + %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.uadd.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> + %I8 = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 undef, i8 undef)
> + %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> + %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.uadd.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> + %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.uadd.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> + ret i32 undef
> +}
> +
> +;
> +; ssub.with.overflow
> +;
> +
> +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>} @llvm.ssub.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.ssub.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1} @llvm.ssub.with.overflow.i16(i16, i16)
> +declare {<8 x i16>, <8 x i1>} @llvm.ssub.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.ssub.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.ssub.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1} @llvm.ssub.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @ssub(i32 %arg) {
> +; CHECK-LABEL: 'ssub'
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.ssub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.ssub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.ssub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.ssub.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.ssub.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.ssub.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.ssub.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.ssub.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.ssub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> + %I64 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 undef, i64 undef)
> + %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.ssub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> + %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.ssub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> + %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.ssub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> + %I32 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 undef, i32 undef)
> + %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.ssub.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> + %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.ssub.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> + %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.ssub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> + %I16 = call {i16, i1} @llvm.ssub.with.overflow.i16(i16 undef, i16 undef)
> + %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.ssub.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> + %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.ssub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> + %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.ssub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> + %I8 = call {i8, i1} @llvm.ssub.with.overflow.i8(i8 undef, i8 undef)
> + %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.ssub.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> + %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.ssub.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> + %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.ssub.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> + ret i32 undef
> +}
> +
> +;
> +; usub.with.overflow
> +;
> +
> +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>} @llvm.usub.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.usub.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16)
> +declare {<8 x i16>, <8 x i1>} @llvm.usub.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.usub.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.usub.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1} @llvm.usub.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @usub(i32 %arg) {
> +; CHECK-LABEL: 'usub'
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.usub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.usub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.usub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.usub.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.usub.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.usub.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.usub.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.usub.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.usub.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> + %I64 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 undef, i64 undef)
> + %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.usub.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> + %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.usub.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> + %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.usub.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> + %I32 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 undef, i32 undef)
> + %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.usub.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> + %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.usub.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> + %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.usub.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> + %I16 = call {i16, i1} @llvm.usub.with.overflow.i16(i16 undef, i16 undef)
> + %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.usub.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> + %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.usub.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> + %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.usub.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> + %I8 = call {i8, i1} @llvm.usub.with.overflow.i8(i8 undef, i8 undef)
> + %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.usub.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> + %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.usub.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> + %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.usub.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> + ret i32 undef
> +}
> +
> +;
> +; smul.with.overflow
> +;
> +
> +declare {i64, i1} @llvm.smul.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>} @llvm.smul.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>} @llvm.smul.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.smul.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1} @llvm.smul.with.overflow.i16(i16, i16)
> +declare {<8 x i16>, <8 x i1>} @llvm.smul.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.smul.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.smul.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1} @llvm.smul.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @smul(i32 %arg) {
> +; CHECK-LABEL: 'smul'
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.smul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.smul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.smul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.smul.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.smul.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.smul.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.smul.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.smul.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.smul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> + %I64 = call {i64, i1} @llvm.smul.with.overflow.i64(i64 undef, i64 undef)
> + %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.smul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> + %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.smul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> + %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.smul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> + %I32 = call {i32, i1} @llvm.smul.with.overflow.i32(i32 undef, i32 undef)
> + %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.smul.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> + %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.smul.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> + %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.smul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> + %I16 = call {i16, i1} @llvm.smul.with.overflow.i16(i16 undef, i16 undef)
> + %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.smul.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> + %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.smul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> + %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.smul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> + %I8 = call {i8, i1} @llvm.smul.with.overflow.i8(i8 undef, i8 undef)
> + %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.smul.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> + %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.smul.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> + %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.smul.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> + ret i32 undef
> +}
> +
> +;
> +; umul.with.overflow
> +;
> +
> +declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
> +declare {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64>,
> <2 x i64>)
> +declare {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x i64>,
> <4 x i64>)
> +declare {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x i64>,
> <8 x i64>)
> +
> +declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
> +declare {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4 x i32>,
> <4 x i32>)
> +declare {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8 x i32>,
> <8 x i32>)
> +declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x
> i32>, <16 x i32>)
> +
> +declare {i16, i1} @llvm.umul.with.overflow.i16(i16, i16)
> +declare {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16>,
> <8 x i16>)
> +declare {<16 x i16>, <16 x i1>} @llvm.umul.with.overflow.v16i16(<16 x
> i16>, <16 x i16>)
> +declare {<32 x i16>, <32 x i1>} @llvm.umul.with.overflow.v32i16(<32 x
> i16>, <32 x i16>)
> +
> +declare {i8, i1} @llvm.umul.with.overflow.i8(i8, i8)
> +declare {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8>,
> <16 x i8>)
> +declare {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32 x i8>,
> <32 x i8>)
> +declare {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64 x i8>,
> <64 x i8>)
> +
> +define i32 @umul(i32 %arg) {
> +; CHECK-LABEL: 'umul'
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I64 = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction:
> %V2I64 = call { <2 x i64>, <2 x i1> } @llvm.umul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I64 = call { <4 x i64>, <4 x i1> } @llvm.umul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I64 = call { <8 x i64>, <8 x i1> } @llvm.umul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I32 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction:
> %V4I32 = call { <4 x i32>, <4 x i1> } @llvm.umul.with.overflow.v4i32(<4 x
> i32> undef, <4 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I32 = call { <8 x i32>, <8 x i1> } @llvm.umul.with.overflow.v8i32(<8 x
> i32> undef, <8 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I32 = call { <16 x i32>, <16 x i1> }
> @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I16 = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 23 for instruction:
> %V8I16 = call { <8 x i16>, <8 x i1> } @llvm.umul.with.overflow.v8i16(<8 x
> i16> undef, <8 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I16 = call { <16 x i16>, <16 x i1> }
> @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I16 = call { <32 x i16>, <32 x i1> }
> @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
> %I8 = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 47 for instruction:
> %V16I8 = call { <16 x i8>, <16 x i1> } @llvm.umul.with.overflow.v16i8(<16 x
> i8> undef, <16 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 95 for instruction:
> %V32I8 = call { <32 x i8>, <32 x i1> } @llvm.umul.with.overflow.v32i8(<32 x
> i8> undef, <32 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 191 for
> instruction: %V64I8 = call { <64 x i8>, <64 x i1> }
> @llvm.umul.with.overflow.v64i8(<64 x i8> undef, <64 x i8> undef)
> +; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
> ret i32 undef
> +;
> + %I64 = call {i64, i1} @llvm.umul.with.overflow.i64(i64 undef, i64 undef)
> + %V2I64 = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x
> i64> undef, <2 x i64> undef)
> + %V4I64 = call {<4 x i64>, <4 x i1>} @llvm.umul.with.overflow.v4i64(<4 x
> i64> undef, <4 x i64> undef)
> + %V8I64 = call {<8 x i64>, <8 x i1>} @llvm.umul.with.overflow.v8i64(<8 x
> i64> undef, <8 x i64> undef)
> +
> + %I32 = call {i32, i1} @llvm.umul.with.overflow.i32(i32 undef, i32 undef)
> + %V4I32 = call {<4 x i32>, <4 x i1>} @llvm.umul.with.overflow.v4i32(<4
> x i32> undef, <4 x i32> undef)
> + %V8I32 = call {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8
> x i32> undef, <8 x i32> undef)
> + %V16I32 = call {<16 x i32>, <16 x i1>}
> @llvm.umul.with.overflow.v16i32(<16 x i32> undef, <16 x i32> undef)
> +
> + %I16 = call {i16, i1} @llvm.umul.with.overflow.i16(i16 undef, i16 undef)
> + %V8I16 = call {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8
> x i16> undef, <8 x i16> undef)
> + %V16I16 = call {<16 x i16>, <16 x i1>}
> @llvm.umul.with.overflow.v16i16(<16 x i16> undef, <16 x i16> undef)
> + %V32I16 = call {<32 x i16>, <32 x i1>}
> @llvm.umul.with.overflow.v32i16(<32 x i16> undef, <32 x i16> undef)
> +
> + %I8 = call {i8, i1} @llvm.umul.with.overflow.i8(i8 undef, i8 undef)
> + %V16I8 = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16
> x i8> undef, <16 x i8> undef)
> + %V32I8 = call {<32 x i8>, <32 x i1>} @llvm.umul.with.overflow.v32i8(<32
> x i8> undef, <32 x i8> undef)
> + %V64I8 = call {<64 x i8>, <64 x i1>} @llvm.umul.with.overflow.v64i8(<64
> x i8> undef, <64 x i8> undef)
> +
> + ret i32 undef
> +}
>
> Modified: llvm/trunk/utils/TableGen/CodeGenTarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/CodeGenTarget.cpp?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/CodeGenTarget.cpp (original)
> +++ llvm/trunk/utils/TableGen/CodeGenTarget.cpp Wed Jan 23 08:00:22 2019
> @@ -633,7 +633,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Recor
> // overloaded, all the types can be specified directly.
> assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
> !TyEl->isSubClassOf("LLVMTruncatedType") &&
> - !TyEl->isSubClassOf("LLVMVectorSameWidth")) ||
> + !TyEl->isSubClassOf("LLVMScalarOrSameVectorWidth")) ||
> VT == MVT::iAny || VT == MVT::vAny) &&
> "Expected iAny or vAny type");
> } else
>
> Modified: llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp?rev=351957&r1=351956&r2=351957&view=diff
>
> ==============================================================================
> --- llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp (original)
> +++ llvm/trunk/utils/TableGen/IntrinsicEmitter.cpp Wed Jan 23 08:00:22 2019
> @@ -269,7 +269,7 @@ static void EncodeFixedType(Record *R, s
> Sig.push_back(IIT_TRUNC_ARG);
> else if (R->isSubClassOf("LLVMHalfElementsVectorType"))
> Sig.push_back(IIT_HALF_VEC_ARG);
> - else if (R->isSubClassOf("LLVMVectorSameWidth")) {
> + else if (R->isSubClassOf("LLVMScalarOrSameVectorWidth")) {
> Sig.push_back(IIT_SAME_VEC_WIDTH_ARG);
> Sig.push_back((Number << 3) | ArgCodes[Number]);
> MVT::SimpleValueType VT = getValueType(R->getValueAsDef("ElTy"));
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20190123/dd9e99d6/attachment-0001.html>
More information about the llvm-commits
mailing list