[llvm] r328451 - [X86] Update cost model for Goldmont. Add fsqrt costs for Silvermont
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 25 08:58:13 PDT 2018
Author: ctopper
Date: Sun Mar 25 08:58:12 2018
New Revision: 328451
URL: http://llvm.org/viewvc/llvm-project?rev=328451&view=rev
Log:
[X86] Update cost model for Goldmont. Add fsqrt costs for Silvermont
Add fdiv costs for Goldmont using table 16-17 of the Intel Optimization Manual. Also add overrides for FSQRT for Goldmont and Silvermont.
Reviewers: RKSimon
Reviewed By: RKSimon
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D44644
Modified:
llvm/trunk/lib/Target/X86/X86Subtarget.h
llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/trunk/test/Analysis/CostModel/X86/arith-fp.ll
llvm/trunk/test/Analysis/CostModel/X86/arith.ll
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=328451&r1=328450&r2=328451&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Sun Mar 25 08:58:12 2018
@@ -655,6 +655,7 @@ public:
/// TODO: to be removed later and replaced with suitable properties
bool isAtom() const { return X86ProcFamily == IntelAtom; }
bool isSLM() const { return X86ProcFamily == IntelSLM; }
+ bool isGLM() const { return X86ProcFamily == IntelGLM; }
bool useSoftFloat() const { return UseSoftFloat; }
/// Use mfence if we have SSE2 or we're on x86-64 (even if we asked for
Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=328451&r1=328450&r2=328451&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Sun Mar 25 08:58:12 2018
@@ -181,28 +181,40 @@ int X86TTIImpl::getArithmeticInstrCost(
int ISD = TLI->InstructionOpcodeToISD(Opcode);
assert(ISD && "Invalid opcode");
+ static const CostTblEntry GLMCostTable[] = {
+ { ISD::FDIV, MVT::f32, 18 }, // divss
+ { ISD::FDIV, MVT::v4f32, 35 }, // divps
+ { ISD::FDIV, MVT::f64, 33 }, // divsd
+ { ISD::FDIV, MVT::v2f64, 65 }, // divpd
+ };
+
+ if (ST->isGLM())
+ if (const auto *Entry = CostTableLookup(GLMCostTable, ISD,
+ LT.second))
+ return LT.first * Entry->Cost;
+
static const CostTblEntry SLMCostTable[] = {
- { ISD::MUL, MVT::v4i32, 11 }, // pmulld
- { ISD::MUL, MVT::v8i16, 2 }, // pmullw
- { ISD::MUL, MVT::v16i8, 14 }, // extend/pmullw/trunc sequence.
- { ISD::FMUL, MVT::f64, 2 }, // mulsd
- { ISD::FMUL, MVT::v2f64, 4 }, // mulpd
- { ISD::FMUL, MVT::v4f32, 2 }, // mulps
- { ISD::FDIV, MVT::f32, 17 }, // divss
- { ISD::FDIV, MVT::v4f32, 39 }, // divps
- { ISD::FDIV, MVT::f64, 32 }, // divsd
- { ISD::FDIV, MVT::v2f64, 69 }, // divpd
- { ISD::FADD, MVT::v2f64, 2 }, // addpd
- { ISD::FSUB, MVT::v2f64, 2 }, // subpd
+ { ISD::MUL, MVT::v4i32, 11 }, // pmulld
+ { ISD::MUL, MVT::v8i16, 2 }, // pmullw
+ { ISD::MUL, MVT::v16i8, 14 }, // extend/pmullw/trunc sequence.
+ { ISD::FMUL, MVT::f64, 2 }, // mulsd
+ { ISD::FMUL, MVT::v2f64, 4 }, // mulpd
+ { ISD::FMUL, MVT::v4f32, 2 }, // mulps
+ { ISD::FDIV, MVT::f32, 17 }, // divss
+ { ISD::FDIV, MVT::v4f32, 39 }, // divps
+ { ISD::FDIV, MVT::f64, 32 }, // divsd
+ { ISD::FDIV, MVT::v2f64, 69 }, // divpd
+ { ISD::FADD, MVT::v2f64, 2 }, // addpd
+ { ISD::FSUB, MVT::v2f64, 2 }, // subpd
// v2i64/v4i64 mul is custom lowered as a series of long:
// multiplies(3), shifts(3) and adds(2)
// slm muldq version throughput is 2 and addq throughput 4
// thus: 3X2 (muldq throughput) + 3X1 (shift throughput) +
// 3X4 (addq throughput) = 17
- { ISD::MUL, MVT::v2i64, 17 },
+ { ISD::MUL, MVT::v2i64, 17 },
// slm addq\subq throughput is 4
- { ISD::ADD, MVT::v2i64, 4 },
- { ISD::SUB, MVT::v2i64, 4 },
+ { ISD::ADD, MVT::v2i64, 4 },
+ { ISD::SUB, MVT::v2i64, 4 },
};
if (ST->isSLM()) {
@@ -225,6 +237,7 @@ int X86TTIImpl::getArithmeticInstrCost(
if (!signedMode && OpMinSize <= 16)
return LT.first * 5; // pmullw/pmulhw/pshuf
}
+
if (const auto *Entry = CostTableLookup(SLMCostTable, ISD,
LT.second)) {
return LT.first * Entry->Cost;
@@ -1665,6 +1678,18 @@ int X86TTIImpl::getIntrinsicInstrCost(In
{ ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/
};
+ static const CostTblEntry GLMCostTbl[] = {
+ { ISD::FSQRT, MVT::f32, 19 }, // sqrtss
+ { ISD::FSQRT, MVT::v4f32, 37 }, // sqrtps
+ { ISD::FSQRT, MVT::f64, 34 }, // sqrtsd
+ { ISD::FSQRT, MVT::v2f64, 67 }, // sqrtpd
+ };
+ static const CostTblEntry SLMCostTbl[] = {
+ { ISD::FSQRT, MVT::f32, 20 }, // sqrtss
+ { ISD::FSQRT, MVT::v4f32, 40 }, // sqrtps
+ { ISD::FSQRT, MVT::f64, 35 }, // sqrtsd
+ { ISD::FSQRT, MVT::v2f64, 70 }, // sqrtpd
+ };
static const CostTblEntry SSE42CostTbl[] = {
{ ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/
{ ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
@@ -1755,6 +1780,14 @@ int X86TTIImpl::getIntrinsicInstrCost(In
MVT MTy = LT.second;
// Attempt to lookup cost.
+ if (ST->isGLM())
+ if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
+ if (ST->isSLM())
+ if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy))
+ return LT.first * Entry->Cost;
+
if (ST->hasCDI())
if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy))
return LT.first * Entry->Cost;
Modified: llvm/trunk/test/Analysis/CostModel/X86/arith-fp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/arith-fp.ll?rev=328451&r1=328450&r2=328451&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/arith-fp.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/arith-fp.ll Sun Mar 25 08:58:12 2018
@@ -5,6 +5,7 @@
; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
+; RUN: opt < %s -enable-no-nans-fp-math -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GLM
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -17,6 +18,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F32 = fadd
; AVX512: cost of 1 {{.*}} %F32 = fadd
; SLM: cost of 1 {{.*}} %F32 = fadd
+ ; GLM: cost of 1 {{.*}} %F32 = fadd
%F32 = fadd float undef, undef
; SSE2: cost of 2 {{.*}} %V4F32 = fadd
; SSE42: cost of 1 {{.*}} %V4F32 = fadd
@@ -24,6 +26,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F32 = fadd
; AVX512: cost of 1 {{.*}} %V4F32 = fadd
; SLM: cost of 1 {{.*}} %V4F32 = fadd
+ ; GLM: cost of 1 {{.*}} %V4F32 = fadd
%V4F32 = fadd <4 x float> undef, undef
; SSE2: cost of 4 {{.*}} %V8F32 = fadd
; SSE42: cost of 2 {{.*}} %V8F32 = fadd
@@ -31,6 +34,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V8F32 = fadd
; AVX512: cost of 1 {{.*}} %V8F32 = fadd
; SLM: cost of 2 {{.*}} %V8F32 = fadd
+ ; GLM: cost of 2 {{.*}} %V8F32 = fadd
%V8F32 = fadd <8 x float> undef, undef
; SSE2: cost of 8 {{.*}} %V16F32 = fadd
; SSE42: cost of 4 {{.*}} %V16F32 = fadd
@@ -38,6 +42,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V16F32 = fadd
; AVX512: cost of 1 {{.*}} %V16F32 = fadd
; SLM: cost of 4 {{.*}} %V16F32 = fadd
+ ; GLM: cost of 4 {{.*}} %V16F32 = fadd
%V16F32 = fadd <16 x float> undef, undef
; SSE2: cost of 2 {{.*}} %F64 = fadd
@@ -46,6 +51,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F64 = fadd
; AVX512: cost of 1 {{.*}} %F64 = fadd
; SLM: cost of 1 {{.*}} %F64 = fadd
+ ; GLM: cost of 1 {{.*}} %F64 = fadd
%F64 = fadd double undef, undef
; SSE2: cost of 2 {{.*}} %V2F64 = fadd
; SSE42: cost of 1 {{.*}} %V2F64 = fadd
@@ -53,6 +59,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V2F64 = fadd
; AVX512: cost of 1 {{.*}} %V2F64 = fadd
; SLM: cost of 2 {{.*}} %V2F64 = fadd
+ ; GLM: cost of 1 {{.*}} %V2F64 = fadd
%V2F64 = fadd <2 x double> undef, undef
; SSE2: cost of 4 {{.*}} %V4F64 = fadd
; SSE42: cost of 2 {{.*}} %V4F64 = fadd
@@ -60,6 +67,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F64 = fadd
; AVX512: cost of 1 {{.*}} %V4F64 = fadd
; SLM: cost of 4 {{.*}} %V4F64 = fadd
+ ; GLM: cost of 2 {{.*}} %V4F64 = fadd
%V4F64 = fadd <4 x double> undef, undef
; SSE2: cost of 8 {{.*}} %V8F64 = fadd
; SSE42: cost of 4 {{.*}} %V8F64 = fadd
@@ -67,6 +75,7 @@ define i32 @fadd(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V8F64 = fadd
; AVX512: cost of 1 {{.*}} %V8F64 = fadd
; SLM: cost of 8 {{.*}} %V8F64 = fadd
+ ; GLM: cost of 4 {{.*}} %V8F64 = fadd
%V8F64 = fadd <8 x double> undef, undef
ret i32 undef
@@ -80,6 +89,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F32 = fsub
; AVX512: cost of 1 {{.*}} %F32 = fsub
; SLM: cost of 1 {{.*}} %F32 = fsub
+ ; GLM: cost of 1 {{.*}} %F32 = fsub
%F32 = fsub float undef, undef
; SSE2: cost of 2 {{.*}} %V4F32 = fsub
; SSE42: cost of 1 {{.*}} %V4F32 = fsub
@@ -87,6 +97,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F32 = fsub
; AVX512: cost of 1 {{.*}} %V4F32 = fsub
; SLM: cost of 1 {{.*}} %V4F32 = fsub
+ ; GLM: cost of 1 {{.*}} %V4F32 = fsub
%V4F32 = fsub <4 x float> undef, undef
; SSE2: cost of 4 {{.*}} %V8F32 = fsub
; SSE42: cost of 2 {{.*}} %V8F32 = fsub
@@ -94,6 +105,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V8F32 = fsub
; AVX512: cost of 1 {{.*}} %V8F32 = fsub
; SLM: cost of 2 {{.*}} %V8F32 = fsub
+ ; GLM: cost of 2 {{.*}} %V8F32 = fsub
%V8F32 = fsub <8 x float> undef, undef
; SSE2: cost of 8 {{.*}} %V16F32 = fsub
; SSE42: cost of 4 {{.*}} %V16F32 = fsub
@@ -101,6 +113,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V16F32 = fsub
; AVX512: cost of 1 {{.*}} %V16F32 = fsub
; SLM: cost of 4 {{.*}} %V16F32 = fsub
+ ; GLM: cost of 4 {{.*}} %V16F32 = fsub
%V16F32 = fsub <16 x float> undef, undef
; SSE2: cost of 2 {{.*}} %F64 = fsub
@@ -109,6 +122,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F64 = fsub
; AVX512: cost of 1 {{.*}} %F64 = fsub
; SLM: cost of 1 {{.*}} %F64 = fsub
+ ; GLM: cost of 1 {{.*}} %F64 = fsub
%F64 = fsub double undef, undef
; SSE2: cost of 2 {{.*}} %V2F64 = fsub
; SSE42: cost of 1 {{.*}} %V2F64 = fsub
@@ -116,6 +130,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V2F64 = fsub
; AVX512: cost of 1 {{.*}} %V2F64 = fsub
; SLM: cost of 2 {{.*}} %V2F64 = fsub
+ ; GLM: cost of 1 {{.*}} %V2F64 = fsub
%V2F64 = fsub <2 x double> undef, undef
; SSE2: cost of 4 {{.*}} %V4F64 = fsub
; SSE42: cost of 2 {{.*}} %V4F64 = fsub
@@ -123,6 +138,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F64 = fsub
; AVX512: cost of 1 {{.*}} %V4F64 = fsub
; SLM: cost of 4 {{.*}} %V4F64 = fsub
+ ; GLM: cost of 2 {{.*}} %V4F64 = fsub
%V4F64 = fsub <4 x double> undef, undef
; SSE2: cost of 8 {{.*}} %V8F64 = fsub
; SSE42: cost of 4 {{.*}} %V8F64 = fsub
@@ -130,6 +146,7 @@ define i32 @fsub(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V8F64 = fsub
; AVX512: cost of 1 {{.*}} %V8F64 = fsub
; SLM: cost of 8 {{.*}} %V8F64 = fsub
+ ; GLM: cost of 4 {{.*}} %V8F64 = fsub
%V8F64 = fsub <8 x double> undef, undef
ret i32 undef
@@ -143,6 +160,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F32 = fmul
; AVX512: cost of 1 {{.*}} %F32 = fmul
; SLM: cost of 1 {{.*}} %F32 = fmul
+ ; GLM: cost of 1 {{.*}} %F32 = fmul
%F32 = fmul float undef, undef
; SSE2: cost of 2 {{.*}} %V4F32 = fmul
; SSE42: cost of 1 {{.*}} %V4F32 = fmul
@@ -150,6 +168,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F32 = fmul
; AVX512: cost of 1 {{.*}} %V4F32 = fmul
; SLM: cost of 2 {{.*}} %V4F32 = fmul
+ ; GLM: cost of 1 {{.*}} %V4F32 = fmul
%V4F32 = fmul <4 x float> undef, undef
; SSE2: cost of 4 {{.*}} %V8F32 = fmul
; SSE42: cost of 2 {{.*}} %V8F32 = fmul
@@ -157,6 +176,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V8F32 = fmul
; AVX512: cost of 1 {{.*}} %V8F32 = fmul
; SLM: cost of 4 {{.*}} %V8F32 = fmul
+ ; GLM: cost of 2 {{.*}} %V8F32 = fmul
%V8F32 = fmul <8 x float> undef, undef
; SSE2: cost of 8 {{.*}} %V16F32 = fmul
; SSE42: cost of 4 {{.*}} %V16F32 = fmul
@@ -164,6 +184,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V16F32 = fmul
; AVX512: cost of 1 {{.*}} %V16F32 = fmul
; SLM: cost of 8 {{.*}} %V16F32 = fmul
+ ; GLM: cost of 4 {{.*}} %V16F32 = fmul
%V16F32 = fmul <16 x float> undef, undef
; SSE2: cost of 2 {{.*}} %F64 = fmul
@@ -172,6 +193,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F64 = fmul
; AVX512: cost of 1 {{.*}} %F64 = fmul
; SLM: cost of 2 {{.*}} %F64 = fmul
+ ; GLM: cost of 1 {{.*}} %F64 = fmul
%F64 = fmul double undef, undef
; SSE2: cost of 2 {{.*}} %V2F64 = fmul
; SSE42: cost of 1 {{.*}} %V2F64 = fmul
@@ -179,6 +201,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V2F64 = fmul
; AVX512: cost of 1 {{.*}} %V2F64 = fmul
; SLM: cost of 4 {{.*}} %V2F64 = fmul
+ ; GLM: cost of 1 {{.*}} %V2F64 = fmul
%V2F64 = fmul <2 x double> undef, undef
; SSE2: cost of 4 {{.*}} %V4F64 = fmul
; SSE42: cost of 2 {{.*}} %V4F64 = fmul
@@ -186,6 +209,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F64 = fmul
; AVX512: cost of 1 {{.*}} %V4F64 = fmul
; SLM: cost of 8 {{.*}} %V4F64 = fmul
+ ; GLM: cost of 2 {{.*}} %V4F64 = fmul
%V4F64 = fmul <4 x double> undef, undef
; SSE2: cost of 8 {{.*}} %V8F64 = fmul
; SSE42: cost of 4 {{.*}} %V8F64 = fmul
@@ -193,6 +217,7 @@ define i32 @fmul(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V8F64 = fmul
; AVX512: cost of 1 {{.*}} %V8F64 = fmul
; SLM: cost of 16 {{.*}} %V8F64 = fmul
+ ; GLM: cost of 4 {{.*}} %V8F64 = fmul
%V8F64 = fmul <8 x double> undef, undef
ret i32 undef
@@ -206,6 +231,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 7 {{.*}} %F32 = fdiv
; AVX512: cost of 7 {{.*}} %F32 = fdiv
; SLM: cost of 17 {{.*}} %F32 = fdiv
+ ; GLM: cost of 18 {{.*}} %F32 = fdiv
%F32 = fdiv float undef, undef
; SSE2: cost of 39 {{.*}} %V4F32 = fdiv
; SSE42: cost of 14 {{.*}} %V4F32 = fdiv
@@ -213,6 +239,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 7 {{.*}} %V4F32 = fdiv
; AVX512: cost of 7 {{.*}} %V4F32 = fdiv
; SLM: cost of 39 {{.*}} %V4F32 = fdiv
+ ; GLM: cost of 35 {{.*}} %V4F32 = fdiv
%V4F32 = fdiv <4 x float> undef, undef
; SSE2: cost of 78 {{.*}} %V8F32 = fdiv
; SSE42: cost of 28 {{.*}} %V8F32 = fdiv
@@ -220,6 +247,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 14 {{.*}} %V8F32 = fdiv
; AVX512: cost of 14 {{.*}} %V8F32 = fdiv
; SLM: cost of 78 {{.*}} %V8F32 = fdiv
+ ; GLM: cost of 70 {{.*}} %V8F32 = fdiv
%V8F32 = fdiv <8 x float> undef, undef
; SSE2: cost of 156 {{.*}} %V16F32 = fdiv
; SSE42: cost of 56 {{.*}} %V16F32 = fdiv
@@ -227,6 +255,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 28 {{.*}} %V16F32 = fdiv
; AVX512: cost of 2 {{.*}} %V16F32 = fdiv
; SLM: cost of 156 {{.*}} %V16F32 = fdiv
+ ; GLM: cost of 140 {{.*}} %V16F32 = fdiv
%V16F32 = fdiv <16 x float> undef, undef
; SSE2: cost of 38 {{.*}} %F64 = fdiv
@@ -235,6 +264,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 14 {{.*}} %F64 = fdiv
; AVX512: cost of 14 {{.*}} %F64 = fdiv
; SLM: cost of 32 {{.*}} %F64 = fdiv
+ ; GLM: cost of 33 {{.*}} %F64 = fdiv
%F64 = fdiv double undef, undef
; SSE2: cost of 69 {{.*}} %V2F64 = fdiv
; SSE42: cost of 22 {{.*}} %V2F64 = fdiv
@@ -242,6 +272,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 14 {{.*}} %V2F64 = fdiv
; AVX512: cost of 14 {{.*}} %V2F64 = fdiv
; SLM: cost of 69 {{.*}} %V2F64 = fdiv
+ ; GLM: cost of 65 {{.*}} %V2F64 = fdiv
%V2F64 = fdiv <2 x double> undef, undef
; SSE2: cost of 138 {{.*}} %V4F64 = fdiv
; SSE42: cost of 44 {{.*}} %V4F64 = fdiv
@@ -249,6 +280,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 28 {{.*}} %V4F64 = fdiv
; AVX512: cost of 28 {{.*}} %V4F64 = fdiv
; SLM: cost of 138 {{.*}} %V4F64 = fdiv
+ ; GLM: cost of 130 {{.*}} %V4F64 = fdiv
%V4F64 = fdiv <4 x double> undef, undef
; SSE2: cost of 276 {{.*}} %V8F64 = fdiv
; SSE42: cost of 88 {{.*}} %V8F64 = fdiv
@@ -256,6 +288,7 @@ define i32 @fdiv(i32 %arg) {
; AVX2: cost of 56 {{.*}} %V8F64 = fdiv
; AVX512: cost of 2 {{.*}} %V8F64 = fdiv
; SLM: cost of 276 {{.*}} %V8F64 = fdiv
+ ; GLM: cost of 260 {{.*}} %V8F64 = fdiv
%V8F64 = fdiv <8 x double> undef, undef
ret i32 undef
@@ -269,6 +302,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 2 {{.*}} %F32 = frem
; AVX512: cost of 2 {{.*}} %F32 = frem
; SLM: cost of 2 {{.*}} %F32 = frem
+ ; GLM: cost of 2 {{.*}} %F32 = frem
%F32 = frem float undef, undef
; SSE2: cost of 14 {{.*}} %V4F32 = frem
; SSE42: cost of 14 {{.*}} %V4F32 = frem
@@ -276,6 +310,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 14 {{.*}} %V4F32 = frem
; AVX512: cost of 14 {{.*}} %V4F32 = frem
; SLM: cost of 14 {{.*}} %V4F32 = frem
+ ; GLM: cost of 14 {{.*}} %V4F32 = frem
%V4F32 = frem <4 x float> undef, undef
; SSE2: cost of 28 {{.*}} %V8F32 = frem
; SSE42: cost of 28 {{.*}} %V8F32 = frem
@@ -283,6 +318,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 30 {{.*}} %V8F32 = frem
; AVX512: cost of 30 {{.*}} %V8F32 = frem
; SLM: cost of 28 {{.*}} %V8F32 = frem
+ ; GLM: cost of 28 {{.*}} %V8F32 = frem
%V8F32 = frem <8 x float> undef, undef
; SSE2: cost of 56 {{.*}} %V16F32 = frem
; SSE42: cost of 56 {{.*}} %V16F32 = frem
@@ -290,6 +326,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 60 {{.*}} %V16F32 = frem
; AVX512: cost of 62 {{.*}} %V16F32 = frem
; SLM: cost of 56 {{.*}} %V16F32 = frem
+ ; GLM: cost of 56 {{.*}} %V16F32 = frem
%V16F32 = frem <16 x float> undef, undef
; SSE2: cost of 2 {{.*}} %F64 = frem
@@ -298,6 +335,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 2 {{.*}} %F64 = frem
; AVX512: cost of 2 {{.*}} %F64 = frem
; SLM: cost of 2 {{.*}} %F64 = frem
+ ; GLM: cost of 2 {{.*}} %F64 = frem
%F64 = frem double undef, undef
; SSE2: cost of 6 {{.*}} %V2F64 = frem
; SSE42: cost of 6 {{.*}} %V2F64 = frem
@@ -305,6 +343,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 6 {{.*}} %V2F64 = frem
; AVX512: cost of 6 {{.*}} %V2F64 = frem
; SLM: cost of 6 {{.*}} %V2F64 = frem
+ ; GLM: cost of 6 {{.*}} %V2F64 = frem
%V2F64 = frem <2 x double> undef, undef
; SSE2: cost of 12 {{.*}} %V4F64 = frem
; SSE42: cost of 12 {{.*}} %V4F64 = frem
@@ -312,6 +351,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 14 {{.*}} %V4F64 = frem
; AVX512: cost of 14 {{.*}} %V4F64 = frem
; SLM: cost of 12 {{.*}} %V4F64 = frem
+ ; GLM: cost of 12 {{.*}} %V4F64 = frem
%V4F64 = frem <4 x double> undef, undef
; SSE2: cost of 24 {{.*}} %V8F64 = frem
; SSE42: cost of 24 {{.*}} %V8F64 = frem
@@ -319,6 +359,7 @@ define i32 @frem(i32 %arg) {
; AVX2: cost of 28 {{.*}} %V8F64 = frem
; AVX512: cost of 30 {{.*}} %V8F64 = frem
; SLM: cost of 24 {{.*}} %V8F64 = frem
+ ; GLM: cost of 24 {{.*}} %V8F64 = frem
%V8F64 = frem <8 x double> undef, undef
ret i32 undef
@@ -331,28 +372,32 @@ define i32 @fsqrt(i32 %arg) {
; AVX: cost of 14 {{.*}} %F32 = call float @llvm.sqrt.f32
; AVX2: cost of 7 {{.*}} %F32 = call float @llvm.sqrt.f32
; AVX512: cost of 7 {{.*}} %F32 = call float @llvm.sqrt.f32
- ; SLM: cost of 18 {{.*}} %F32 = call float @llvm.sqrt.f32
+ ; SLM: cost of 20 {{.*}} %F32 = call float @llvm.sqrt.f32
+ ; GLM: cost of 19 {{.*}} %F32 = call float @llvm.sqrt.f32
%F32 = call float @llvm.sqrt.f32(float undef)
; SSE2: cost of 56 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
; SSE42: cost of 18 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
; AVX: cost of 14 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
; AVX2: cost of 7 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
; AVX512: cost of 7 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
- ; SLM: cost of 18 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+ ; SLM: cost of 40 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
+ ; GLM: cost of 37 {{.*}} %V4F32 = call <4 x float> @llvm.sqrt.v4f32
%V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef)
; SSE2: cost of 112 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
; SSE42: cost of 36 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
; AVX: cost of 28 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
; AVX2: cost of 14 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
; AVX512: cost of 14 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
- ; SLM: cost of 36 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
+ ; SLM: cost of 80 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
+ ; GLM: cost of 74 {{.*}} %V8F32 = call <8 x float> @llvm.sqrt.v8f32
%V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef)
; SSE2: cost of 224 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
; SSE42: cost of 72 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
; AVX: cost of 56 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
; AVX2: cost of 28 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
- ; SLM: cost of 72 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
+ ; SLM: cost of 160 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
+ ; GLM: cost of 148 {{.*}} %V16F32 = call <16 x float> @llvm.sqrt.v16f32
%V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef)
; SSE2: cost of 32 {{.*}} %F64 = call double @llvm.sqrt.f64
@@ -360,28 +405,32 @@ define i32 @fsqrt(i32 %arg) {
; AVX: cost of 21 {{.*}} %F64 = call double @llvm.sqrt.f64
; AVX2: cost of 14 {{.*}} %F64 = call double @llvm.sqrt.f64
; AVX512: cost of 14 {{.*}} %F64 = call double @llvm.sqrt.f64
- ; SLM: cost of 32 {{.*}} %F64 = call double @llvm.sqrt.f64
+ ; SLM: cost of 35 {{.*}} %F64 = call double @llvm.sqrt.f64
+ ; GLM: cost of 34 {{.*}} %F64 = call double @llvm.sqrt.f64
%F64 = call double @llvm.sqrt.f64(double undef)
; SSE2: cost of 32 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
; SSE42: cost of 32 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
; AVX: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
; AVX2: cost of 14 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
; AVX512: cost of 14 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
- ; SLM: cost of 32 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+ ; SLM: cost of 70 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
+ ; GLM: cost of 67 {{.*}} %V2F64 = call <2 x double> @llvm.sqrt.v2f64
%V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef)
; SSE2: cost of 64 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
; SSE42: cost of 64 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
; AVX: cost of 43 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
; AVX2: cost of 28 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
; AVX512: cost of 28 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
- ; SLM: cost of 64 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
+ ; SLM: cost of 140 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
+ ; GLM: cost of 134 {{.*}} %V4F64 = call <4 x double> @llvm.sqrt.v4f64
%V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef)
; SSE2: cost of 128 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
; SSE42: cost of 128 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
; AVX: cost of 86 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
; AVX2: cost of 56 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
- ; SLM: cost of 128 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
+ ; SLM: cost of 280 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
+ ; GLM: cost of 268 {{.*}} %V8F64 = call <8 x double> @llvm.sqrt.v8f64
%V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef)
ret i32 undef
@@ -395,6 +444,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
; SLM: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
+ ; GLM: cost of 2 {{.*}} %F32 = call float @llvm.fabs.f32
%F32 = call float @llvm.fabs.f32(float undef)
; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
@@ -402,6 +452,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
; SLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
+ ; GLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.fabs.v4f32
%V4F32 = call <4 x float> @llvm.fabs.v4f32(<4 x float> undef)
; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
@@ -409,6 +460,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
; SLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
+ ; GLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.fabs.v8f32
%V8F32 = call <8 x float> @llvm.fabs.v8f32(<8 x float> undef)
; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
@@ -416,6 +468,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
; SLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
+ ; GLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.fabs.v16f32
%V16F32 = call <16 x float> @llvm.fabs.v16f32(<16 x float> undef)
; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
@@ -424,6 +477,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
; SLM: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
+ ; GLM: cost of 2 {{.*}} %F64 = call double @llvm.fabs.f64
%F64 = call double @llvm.fabs.f64(double undef)
; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
@@ -431,6 +485,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
; SLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
+ ; GLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.fabs.v2f64
%V2F64 = call <2 x double> @llvm.fabs.v2f64(<2 x double> undef)
; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
@@ -438,6 +493,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
; SLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
+ ; GLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.fabs.v4f64
%V4F64 = call <4 x double> @llvm.fabs.v4f64(<4 x double> undef)
; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
@@ -445,6 +501,7 @@ define i32 @fabs(i32 %arg) {
; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
; SLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
+ ; GLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.fabs.v8f64
%V8F64 = call <8 x double> @llvm.fabs.v8f64(<8 x double> undef)
ret i32 undef
@@ -458,6 +515,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
; AVX512: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
; SLM: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
+ ; GLM: cost of 2 {{.*}} %F32 = call float @llvm.copysign.f32
%F32 = call float @llvm.copysign.f32(float undef, float undef)
; SSE2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
; SSE42: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
@@ -465,6 +523,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
; AVX512: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
; SLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
+ ; GLM: cost of 2 {{.*}} %V4F32 = call <4 x float> @llvm.copysign.v4f32
%V4F32 = call <4 x float> @llvm.copysign.v4f32(<4 x float> undef, <4 x float> undef)
; SSE2: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
; SSE42: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
@@ -472,6 +531,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
; AVX512: cost of 2 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
; SLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
+ ; GLM: cost of 4 {{.*}} %V8F32 = call <8 x float> @llvm.copysign.v8f32
%V8F32 = call <8 x float> @llvm.copysign.v8f32(<8 x float> undef, <8 x float> undef)
; SSE2: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
; SSE42: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
@@ -479,6 +539,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
; AVX512: cost of 2 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
; SLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
+ ; GLM: cost of 8 {{.*}} %V16F32 = call <16 x float> @llvm.copysign.v16f32
%V16F32 = call <16 x float> @llvm.copysign.v16f32(<16 x float> undef, <16 x float> undef)
; SSE2: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
@@ -487,6 +548,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
; AVX512: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
; SLM: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
+ ; GLM: cost of 2 {{.*}} %F64 = call double @llvm.copysign.f64
%F64 = call double @llvm.copysign.f64(double undef, double undef)
; SSE2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
; SSE42: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
@@ -494,6 +556,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
; AVX512: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
; SLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
+ ; GLM: cost of 2 {{.*}} %V2F64 = call <2 x double> @llvm.copysign.v2f64
%V2F64 = call <2 x double> @llvm.copysign.v2f64(<2 x double> undef, <2 x double> undef)
; SSE2: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
; SSE42: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
@@ -501,6 +564,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
; AVX512: cost of 2 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
; SLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
+ ; GLM: cost of 4 {{.*}} %V4F64 = call <4 x double> @llvm.copysign.v4f64
%V4F64 = call <4 x double> @llvm.copysign.v4f64(<4 x double> undef, <4 x double> undef)
; SSE2: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
; SSE42: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
@@ -508,6 +572,7 @@ define i32 @fcopysign(i32 %arg) {
; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
; AVX512: cost of 2 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
; SLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
+ ; GLM: cost of 8 {{.*}} %V8F64 = call <8 x double> @llvm.copysign.v8f64
%V8F64 = call <8 x double> @llvm.copysign.v8f64(<8 x double> undef, <8 x double> undef)
ret i32 undef
@@ -521,6 +586,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
; AVX512: cost of 1 {{.*}} %F32 = call float @llvm.fma.f32
; SLM: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
+ ; GLM: cost of 10 {{.*}} %F32 = call float @llvm.fma.f32
%F32 = call float @llvm.fma.f32(float undef, float undef, float undef)
; SSE2: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
; SSE42: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
@@ -528,6 +594,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
; AVX512: cost of 1 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
; SLM: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
+ ; GLM: cost of 43 {{.*}} %V4F32 = call <4 x float> @llvm.fma.v4f32
%V4F32 = call <4 x float> @llvm.fma.v4f32(<4 x float> undef, <4 x float> undef, <4 x float> undef)
; SSE2: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
; SSE42: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
@@ -535,6 +602,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
; AVX512: cost of 1 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
; SLM: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
+ ; GLM: cost of 86 {{.*}} %V8F32 = call <8 x float> @llvm.fma.v8f32
%V8F32 = call <8 x float> @llvm.fma.v8f32(<8 x float> undef, <8 x float> undef, <8 x float> undef)
; SSE2: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
; SSE42: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
@@ -542,6 +610,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 4 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
; AVX512: cost of 1 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
; SLM: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
+ ; GLM: cost of 172 {{.*}} %V16F32 = call <16 x float> @llvm.fma.v16f32
%V16F32 = call <16 x float> @llvm.fma.v16f32(<16 x float> undef, <16 x float> undef, <16 x float> undef)
; SSE2: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
@@ -550,6 +619,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
; AVX512: cost of 1 {{.*}} %F64 = call double @llvm.fma.f64
; SLM: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
+ ; GLM: cost of 10 {{.*}} %F64 = call double @llvm.fma.f64
%F64 = call double @llvm.fma.f64(double undef, double undef, double undef)
; SSE2: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
; SSE42: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
@@ -557,6 +627,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
; AVX512: cost of 1 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
; SLM: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
+ ; GLM: cost of 21 {{.*}} %V2F64 = call <2 x double> @llvm.fma.v2f64
%V2F64 = call <2 x double> @llvm.fma.v2f64(<2 x double> undef, <2 x double> undef, <2 x double> undef)
; SSE2: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
; SSE42: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
@@ -564,6 +635,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
; AVX512: cost of 1 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
; SLM: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
+ ; GLM: cost of 42 {{.*}} %V4F64 = call <4 x double> @llvm.fma.v4f64
%V4F64 = call <4 x double> @llvm.fma.v4f64(<4 x double> undef, <4 x double> undef, <4 x double> undef)
; SSE2: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
; SSE42: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
@@ -571,6 +643,7 @@ define i32 @fma(i32 %arg) {
; AVX2: cost of 4 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
; AVX512: cost of 1 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
; SLM: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
+ ; GLM: cost of 84 {{.*}} %V8F64 = call <8 x double> @llvm.fma.v8f64
%V8F64 = call <8 x double> @llvm.fma.v8f64(<8 x double> undef, <8 x double> undef, <8 x double> undef)
ret i32 undef
Modified: llvm/trunk/test/Analysis/CostModel/X86/arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/arith.ll?rev=328451&r1=328450&r2=328451&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/arith.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/arith.ll Sun Mar 25 08:58:12 2018
@@ -5,6 +5,8 @@
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512BW
; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mattr=+avx512f,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512DQ
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
+; RUN: opt < %s -cost-model -analyze -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefix=CHECK --check-prefix=GLM
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -15,18 +17,24 @@ define i32 @add(i32 %arg) {
%I64 = add i64 undef, undef
; SSSE3: cost of 1 {{.*}} %V2I64 = add
; SSE42: cost of 1 {{.*}} %V2I64 = add
+ ; SLM: cost of 4 {{.*}} %V2I64 = add
+ ; GLM: cost of 1 {{.*}} %V2I64 = add
; AVX: cost of 1 {{.*}} %V2I64 = add
; AVX2: cost of 1 {{.*}} %V2I64 = add
; AVX512: cost of 1 {{.*}} %V2I64 = add
%V2I64 = add <2 x i64> undef, undef
; SSSE3: cost of 2 {{.*}} %V4I64 = add
; SSE42: cost of 2 {{.*}} %V4I64 = add
+ ; SLM: cost of 8 {{.*}} %V4I64 = add
+ ; GLM: cost of 2 {{.*}} %V4I64 = add
; AVX: cost of 4 {{.*}} %V4I64 = add
; AVX2: cost of 1 {{.*}} %V4I64 = add
; AVX512: cost of 1 {{.*}} %V4I64 = add
%V4I64 = add <4 x i64> undef, undef
; SSSE3: cost of 4 {{.*}} %V8I64 = add
; SSE42: cost of 4 {{.*}} %V8I64 = add
+ ; SLM: cost of 16 {{.*}} %V8I64 = add
+ ; GLM: cost of 4 {{.*}} %V8I64 = add
; AVX: cost of 8 {{.*}} %V8I64 = add
; AVX2: cost of 2 {{.*}} %V8I64 = add
; AVX512: cost of 1 {{.*}} %V8I64 = add
@@ -36,18 +44,24 @@ define i32 @add(i32 %arg) {
%I32 = add i32 undef, undef
; SSSE3: cost of 1 {{.*}} %V4I32 = add
; SSE42: cost of 1 {{.*}} %V4I32 = add
+ ; SLM: cost of 1 {{.*}} %V4I32 = add
+ ; GLM: cost of 1 {{.*}} %V4I32 = add
; AVX: cost of 1 {{.*}} %V4I32 = add
; AVX2: cost of 1 {{.*}} %V4I32 = add
; AVX512: cost of 1 {{.*}} %V4I32 = add
%V4I32 = add <4 x i32> undef, undef
; SSSE3: cost of 2 {{.*}} %V8I32 = add
; SSE42: cost of 2 {{.*}} %V8I32 = add
+ ; SLM: cost of 2 {{.*}} %V8I32 = add
+ ; GLM: cost of 2 {{.*}} %V8I32 = add
; AVX: cost of 4 {{.*}} %V8I32 = add
; AVX2: cost of 1 {{.*}} %V8I32 = add
; AVX512: cost of 1 {{.*}} %V8I32 = add
%V8I32 = add <8 x i32> undef, undef
; SSSE3: cost of 4 {{.*}} %V16I32 = add
; SSE42: cost of 4 {{.*}} %V16I32 = add
+ ; SLM: cost of 4 {{.*}} %V16I32 = add
+ ; GLM: cost of 4 {{.*}} %V16I32 = add
; AVX: cost of 8 {{.*}} %V16I32 = add
; AVX2: cost of 2 {{.*}} %V16I32 = add
; AVX512: cost of 1 {{.*}} %V16I32 = add
@@ -57,18 +71,24 @@ define i32 @add(i32 %arg) {
%I16 = add i16 undef, undef
; SSSE3: cost of 1 {{.*}} %V8I16 = add
; SSE42: cost of 1 {{.*}} %V8I16 = add
+ ; SLM: cost of 1 {{.*}} %V8I16 = add
+ ; GLM: cost of 1 {{.*}} %V8I16 = add
; AVX: cost of 1 {{.*}} %V8I16 = add
; AVX2: cost of 1 {{.*}} %V8I16 = add
; AVX512: cost of 1 {{.*}} %V8I16 = add
%V8I16 = add <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %V16I16 = add
; SSE42: cost of 2 {{.*}} %V16I16 = add
+ ; SLM: cost of 2 {{.*}} %V16I16 = add
+ ; GLM: cost of 2 {{.*}} %V16I16 = add
; AVX: cost of 4 {{.*}} %V16I16 = add
; AVX2: cost of 1 {{.*}} %V16I16 = add
; AVX512: cost of 1 {{.*}} %V16I16 = add
%V16I16 = add <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %V32I16 = add
; SSE42: cost of 4 {{.*}} %V32I16 = add
+ ; SLM: cost of 4 {{.*}} %V32I16 = add
+ ; GLM: cost of 4 {{.*}} %V32I16 = add
; AVX: cost of 8 {{.*}} %V32I16 = add
; AVX2: cost of 2 {{.*}} %V32I16 = add
; AVX512F: cost of 2 {{.*}} %V32I16 = add
@@ -79,18 +99,24 @@ define i32 @add(i32 %arg) {
%I8 = add i8 undef, undef
; SSSE3: cost of 1 {{.*}} %V16I8 = add
; SSE42: cost of 1 {{.*}} %V16I8 = add
+ ; SLM: cost of 1 {{.*}} %V16I8 = add
+ ; GLM: cost of 1 {{.*}} %V16I8 = add
; AVX: cost of 1 {{.*}} %V16I8 = add
; AVX2: cost of 1 {{.*}} %V16I8 = add
; AVX512: cost of 1 {{.*}} %V16I8 = add
%V16I8 = add <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %V32I8 = add
; SSE42: cost of 2 {{.*}} %V32I8 = add
+ ; SLM: cost of 2 {{.*}} %V32I8 = add
+ ; GLM: cost of 2 {{.*}} %V32I8 = add
; AVX: cost of 4 {{.*}} %V32I8 = add
; AVX2: cost of 1 {{.*}} %V32I8 = add
; AVX512: cost of 1 {{.*}} %V32I8 = add
%V32I8 = add <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %V64I8 = add
; SSE42: cost of 4 {{.*}} %V64I8 = add
+ ; SLM: cost of 4 {{.*}} %V64I8 = add
+ ; GLM: cost of 4 {{.*}} %V64I8 = add
; AVX: cost of 8 {{.*}} %V64I8 = add
; AVX2: cost of 2 {{.*}} %V64I8 = add
; AVX512F: cost of 2 {{.*}} %V64I8 = add
@@ -106,18 +132,24 @@ define i32 @sub(i32 %arg) {
%I64 = sub i64 undef, undef
; SSSE3: cost of 1 {{.*}} %V2I64 = sub
; SSE42: cost of 1 {{.*}} %V2I64 = sub
+ ; SLM: cost of 4 {{.*}} %V2I64 = sub
+ ; GLM: cost of 1 {{.*}} %V2I64 = sub
; AVX: cost of 1 {{.*}} %V2I64 = sub
; AVX2: cost of 1 {{.*}} %V2I64 = sub
; AVX512: cost of 1 {{.*}} %V2I64 = sub
%V2I64 = sub <2 x i64> undef, undef
; SSSE3: cost of 2 {{.*}} %V4I64 = sub
; SSE42: cost of 2 {{.*}} %V4I64 = sub
+ ; SLM: cost of 8 {{.*}} %V4I64 = sub
+ ; GLM: cost of 2 {{.*}} %V4I64 = sub
; AVX: cost of 4 {{.*}} %V4I64 = sub
; AVX2: cost of 1 {{.*}} %V4I64 = sub
; AVX512: cost of 1 {{.*}} %V4I64 = sub
%V4I64 = sub <4 x i64> undef, undef
; SSSE3: cost of 4 {{.*}} %V8I64 = sub
; SSE42: cost of 4 {{.*}} %V8I64 = sub
+ ; SLM: cost of 16 {{.*}} %V8I64 = sub
+ ; GLM: cost of 4 {{.*}} %V8I64 = sub
; AVX: cost of 8 {{.*}} %V8I64 = sub
; AVX2: cost of 2 {{.*}} %V8I64 = sub
; AVX512: cost of 1 {{.*}} %V8I64 = sub
@@ -127,18 +159,24 @@ define i32 @sub(i32 %arg) {
%I32 = sub i32 undef, undef
; SSSE3: cost of 1 {{.*}} %V4I32 = sub
; SSE42: cost of 1 {{.*}} %V4I32 = sub
+ ; SLM: cost of 1 {{.*}} %V4I32 = sub
+ ; GLM: cost of 1 {{.*}} %V4I32 = sub
; AVX: cost of 1 {{.*}} %V4I32 = sub
; AVX2: cost of 1 {{.*}} %V4I32 = sub
; AVX512: cost of 1 {{.*}} %V4I32 = sub
%V4I32 = sub <4 x i32> undef, undef
; SSSE3: cost of 2 {{.*}} %V8I32 = sub
; SSE42: cost of 2 {{.*}} %V8I32 = sub
+ ; SLM: cost of 2 {{.*}} %V8I32 = sub
+ ; GLM: cost of 2 {{.*}} %V8I32 = sub
; AVX: cost of 4 {{.*}} %V8I32 = sub
; AVX2: cost of 1 {{.*}} %V8I32 = sub
; AVX512: cost of 1 {{.*}} %V8I32 = sub
%V8I32 = sub <8 x i32> undef, undef
; SSSE3: cost of 4 {{.*}} %V16I32 = sub
; SSE42: cost of 4 {{.*}} %V16I32 = sub
+ ; SLM: cost of 4 {{.*}} %V16I32 = sub
+ ; GLM: cost of 4 {{.*}} %V16I32 = sub
; AVX: cost of 8 {{.*}} %V16I32 = sub
; AVX2: cost of 2 {{.*}} %V16I32 = sub
; AVX512: cost of 1 {{.*}} %V16I32 = sub
@@ -148,18 +186,24 @@ define i32 @sub(i32 %arg) {
%I16 = sub i16 undef, undef
; SSSE3: cost of 1 {{.*}} %V8I16 = sub
; SSE42: cost of 1 {{.*}} %V8I16 = sub
+ ; SLM: cost of 1 {{.*}} %V8I16 = sub
+ ; GLM: cost of 1 {{.*}} %V8I16 = sub
; AVX: cost of 1 {{.*}} %V8I16 = sub
; AVX2: cost of 1 {{.*}} %V8I16 = sub
; AVX512: cost of 1 {{.*}} %V8I16 = sub
%V8I16 = sub <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %V16I16 = sub
; SSE42: cost of 2 {{.*}} %V16I16 = sub
+ ; SLM: cost of 2 {{.*}} %V16I16 = sub
+ ; GLM: cost of 2 {{.*}} %V16I16 = sub
; AVX: cost of 4 {{.*}} %V16I16 = sub
; AVX2: cost of 1 {{.*}} %V16I16 = sub
; AVX512: cost of 1 {{.*}} %V16I16 = sub
%V16I16 = sub <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %V32I16 = sub
; SSE42: cost of 4 {{.*}} %V32I16 = sub
+ ; SLM: cost of 4 {{.*}} %V32I16 = sub
+ ; GLM: cost of 4 {{.*}} %V32I16 = sub
; AVX: cost of 8 {{.*}} %V32I16 = sub
; AVX2: cost of 2 {{.*}} %V32I16 = sub
; AVX512F: cost of 2 {{.*}} %V32I16 = sub
@@ -170,18 +214,24 @@ define i32 @sub(i32 %arg) {
%I8 = sub i8 undef, undef
; SSSE3: cost of 1 {{.*}} %V16I8 = sub
; SSE42: cost of 1 {{.*}} %V16I8 = sub
+ ; SLM: cost of 1 {{.*}} %V16I8 = sub
+ ; GLM: cost of 1 {{.*}} %V16I8 = sub
; AVX: cost of 1 {{.*}} %V16I8 = sub
; AVX2: cost of 1 {{.*}} %V16I8 = sub
; AVX512: cost of 1 {{.*}} %V16I8 = sub
%V16I8 = sub <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %V32I8 = sub
; SSE42: cost of 2 {{.*}} %V32I8 = sub
+ ; SLM: cost of 2 {{.*}} %V32I8 = sub
+ ; GLM: cost of 2 {{.*}} %V32I8 = sub
; AVX: cost of 4 {{.*}} %V32I8 = sub
; AVX2: cost of 1 {{.*}} %V32I8 = sub
; AVX512: cost of 1 {{.*}} %V32I8 = sub
%V32I8 = sub <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %V64I8 = sub
; SSE42: cost of 4 {{.*}} %V64I8 = sub
+ ; SLM: cost of 4 {{.*}} %V64I8 = sub
+ ; GLM: cost of 4 {{.*}} %V64I8 = sub
; AVX: cost of 8 {{.*}} %V64I8 = sub
; AVX2: cost of 2 {{.*}} %V64I8 = sub
; AVX512F: cost of 2 {{.*}} %V64I8 = sub
@@ -197,18 +247,24 @@ define i32 @or(i32 %arg) {
%I64 = or i64 undef, undef
; SSSE3: cost of 1 {{.*}} %V2I64 = or
; SSE42: cost of 1 {{.*}} %V2I64 = or
+ ; SLM: cost of 1 {{.*}} %V2I64 = or
+ ; GLM: cost of 1 {{.*}} %V2I64 = or
; AVX: cost of 1 {{.*}} %V2I64 = or
; AVX2: cost of 1 {{.*}} %V2I64 = or
; AVX512: cost of 1 {{.*}} %V2I64 = or
%V2I64 = or <2 x i64> undef, undef
; SSSE3: cost of 2 {{.*}} %V4I64 = or
; SSE42: cost of 2 {{.*}} %V4I64 = or
+ ; SLM: cost of 2 {{.*}} %V4I64 = or
+ ; GLM: cost of 2 {{.*}} %V4I64 = or
; AVX: cost of 1 {{.*}} %V4I64 = or
; AVX2: cost of 1 {{.*}} %V4I64 = or
; AVX512: cost of 1 {{.*}} %V4I64 = or
%V4I64 = or <4 x i64> undef, undef
; SSSE3: cost of 4 {{.*}} %V8I64 = or
; SSE42: cost of 4 {{.*}} %V8I64 = or
+ ; SLM: cost of 4 {{.*}} %V8I64 = or
+ ; GLM: cost of 4 {{.*}} %V8I64 = or
; AVX: cost of 2 {{.*}} %V8I64 = or
; AVX2: cost of 2 {{.*}} %V8I64 = or
; AVX512: cost of 1 {{.*}} %V8I64 = or
@@ -218,18 +274,24 @@ define i32 @or(i32 %arg) {
%I32 = or i32 undef, undef
; SSSE3: cost of 1 {{.*}} %V4I32 = or
; SSE42: cost of 1 {{.*}} %V4I32 = or
+ ; SLM: cost of 1 {{.*}} %V4I32 = or
+ ; GLM: cost of 1 {{.*}} %V4I32 = or
; AVX: cost of 1 {{.*}} %V4I32 = or
; AVX2: cost of 1 {{.*}} %V4I32 = or
; AVX512: cost of 1 {{.*}} %V4I32 = or
%V4I32 = or <4 x i32> undef, undef
; SSSE3: cost of 2 {{.*}} %V8I32 = or
; SSE42: cost of 2 {{.*}} %V8I32 = or
+ ; SLM: cost of 2 {{.*}} %V8I32 = or
+ ; GLM: cost of 2 {{.*}} %V8I32 = or
; AVX: cost of 1 {{.*}} %V8I32 = or
; AVX2: cost of 1 {{.*}} %V8I32 = or
; AVX512: cost of 1 {{.*}} %V8I32 = or
%V8I32 = or <8 x i32> undef, undef
; SSSE3: cost of 4 {{.*}} %V16I32 = or
; SSE42: cost of 4 {{.*}} %V16I32 = or
+ ; SLM: cost of 4 {{.*}} %V16I32 = or
+ ; GLM: cost of 4 {{.*}} %V16I32 = or
; AVX: cost of 2 {{.*}} %V16I32 = or
; AVX2: cost of 2 {{.*}} %V16I32 = or
; AVX512: cost of 1 {{.*}} %V16I32 = or
@@ -239,18 +301,24 @@ define i32 @or(i32 %arg) {
%I16 = or i16 undef, undef
; SSSE3: cost of 1 {{.*}} %V8I16 = or
; SSE42: cost of 1 {{.*}} %V8I16 = or
+ ; SLM: cost of 1 {{.*}} %V8I16 = or
+ ; GLM: cost of 1 {{.*}} %V8I16 = or
; AVX: cost of 1 {{.*}} %V8I16 = or
; AVX2: cost of 1 {{.*}} %V8I16 = or
; AVX512: cost of 1 {{.*}} %V8I16 = or
%V8I16 = or <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %V16I16 = or
; SSE42: cost of 2 {{.*}} %V16I16 = or
+ ; SLM: cost of 2 {{.*}} %V16I16 = or
+ ; GLM: cost of 2 {{.*}} %V16I16 = or
; AVX: cost of 1 {{.*}} %V16I16 = or
; AVX2: cost of 1 {{.*}} %V16I16 = or
; AVX512: cost of 1 {{.*}} %V16I16 = or
%V16I16 = or <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %V32I16 = or
; SSE42: cost of 4 {{.*}} %V32I16 = or
+ ; SLM: cost of 4 {{.*}} %V32I16 = or
+ ; GLM: cost of 4 {{.*}} %V32I16 = or
; AVX: cost of 2 {{.*}} %V32I16 = or
; AVX2: cost of 2 {{.*}} %V32I16 = or
; AVX512F: cost of 2 {{.*}} %V32I16 = or
@@ -261,18 +329,24 @@ define i32 @or(i32 %arg) {
%I8 = or i8 undef, undef
; SSSE3: cost of 1 {{.*}} %V16I8 = or
; SSE42: cost of 1 {{.*}} %V16I8 = or
+ ; SLM: cost of 1 {{.*}} %V16I8 = or
+ ; GLM: cost of 1 {{.*}} %V16I8 = or
; AVX: cost of 1 {{.*}} %V16I8 = or
; AVX2: cost of 1 {{.*}} %V16I8 = or
; AVX512: cost of 1 {{.*}} %V16I8 = or
%V16I8 = or <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %V32I8 = or
; SSE42: cost of 2 {{.*}} %V32I8 = or
+ ; SLM: cost of 2 {{.*}} %V32I8 = or
+ ; GLM: cost of 2 {{.*}} %V32I8 = or
; AVX: cost of 1 {{.*}} %V32I8 = or
; AVX2: cost of 1 {{.*}} %V32I8 = or
; AVX512: cost of 1 {{.*}} %V32I8 = or
%V32I8 = or <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %V64I8 = or
; SSE42: cost of 4 {{.*}} %V64I8 = or
+ ; SLM: cost of 4 {{.*}} %V64I8 = or
+ ; GLM: cost of 4 {{.*}} %V64I8 = or
; AVX: cost of 2 {{.*}} %V64I8 = or
; AVX2: cost of 2 {{.*}} %V64I8 = or
; AVX512F: cost of 2 {{.*}} %V64I8 = or
@@ -288,18 +362,24 @@ define i32 @xor(i32 %arg) {
%I64 = xor i64 undef, undef
; SSSE3: cost of 1 {{.*}} %V2I64 = xor
; SSE42: cost of 1 {{.*}} %V2I64 = xor
+ ; SLM: cost of 1 {{.*}} %V2I64 = xor
+ ; GLM: cost of 1 {{.*}} %V2I64 = xor
; AVX: cost of 1 {{.*}} %V2I64 = xor
; AVX2: cost of 1 {{.*}} %V2I64 = xor
; AVX512: cost of 1 {{.*}} %V2I64 = xor
%V2I64 = xor <2 x i64> undef, undef
; SSSE3: cost of 2 {{.*}} %V4I64 = xor
; SSE42: cost of 2 {{.*}} %V4I64 = xor
+ ; SLM: cost of 2 {{.*}} %V4I64 = xor
+ ; GLM: cost of 2 {{.*}} %V4I64 = xor
; AVX: cost of 1 {{.*}} %V4I64 = xor
; AVX2: cost of 1 {{.*}} %V4I64 = xor
; AVX512: cost of 1 {{.*}} %V4I64 = xor
%V4I64 = xor <4 x i64> undef, undef
; SSSE3: cost of 4 {{.*}} %V8I64 = xor
; SSE42: cost of 4 {{.*}} %V8I64 = xor
+ ; SLM: cost of 4 {{.*}} %V8I64 = xor
+ ; GLM: cost of 4 {{.*}} %V8I64 = xor
; AVX: cost of 2 {{.*}} %V8I64 = xor
; AVX2: cost of 2 {{.*}} %V8I64 = xor
; AVX512: cost of 1 {{.*}} %V8I64 = xor
@@ -309,18 +389,24 @@ define i32 @xor(i32 %arg) {
%I32 = xor i32 undef, undef
; SSSE3: cost of 1 {{.*}} %V4I32 = xor
; SSE42: cost of 1 {{.*}} %V4I32 = xor
+ ; SLM: cost of 1 {{.*}} %V4I32 = xor
+ ; GLM: cost of 1 {{.*}} %V4I32 = xor
; AVX: cost of 1 {{.*}} %V4I32 = xor
; AVX2: cost of 1 {{.*}} %V4I32 = xor
; AVX512: cost of 1 {{.*}} %V4I32 = xor
%V4I32 = xor <4 x i32> undef, undef
; SSSE3: cost of 2 {{.*}} %V8I32 = xor
; SSE42: cost of 2 {{.*}} %V8I32 = xor
+ ; SLM: cost of 2 {{.*}} %V8I32 = xor
+ ; GLM: cost of 2 {{.*}} %V8I32 = xor
; AVX: cost of 1 {{.*}} %V8I32 = xor
; AVX2: cost of 1 {{.*}} %V8I32 = xor
; AVX512: cost of 1 {{.*}} %V8I32 = xor
%V8I32 = xor <8 x i32> undef, undef
; SSSE3: cost of 4 {{.*}} %V16I32 = xor
; SSE42: cost of 4 {{.*}} %V16I32 = xor
+ ; SLM: cost of 4 {{.*}} %V16I32 = xor
+ ; GLM: cost of 4 {{.*}} %V16I32 = xor
; AVX: cost of 2 {{.*}} %V16I32 = xor
; AVX2: cost of 2 {{.*}} %V16I32 = xor
; AVX512: cost of 1 {{.*}} %V16I32 = xor
@@ -330,18 +416,24 @@ define i32 @xor(i32 %arg) {
%I16 = xor i16 undef, undef
; SSSE3: cost of 1 {{.*}} %V8I16 = xor
; SSE42: cost of 1 {{.*}} %V8I16 = xor
+ ; SLM: cost of 1 {{.*}} %V8I16 = xor
+ ; GLM: cost of 1 {{.*}} %V8I16 = xor
; AVX: cost of 1 {{.*}} %V8I16 = xor
; AVX2: cost of 1 {{.*}} %V8I16 = xor
; AVX512: cost of 1 {{.*}} %V8I16 = xor
%V8I16 = xor <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %V16I16 = xor
; SSE42: cost of 2 {{.*}} %V16I16 = xor
+ ; SLM: cost of 2 {{.*}} %V16I16 = xor
+ ; GLM: cost of 2 {{.*}} %V16I16 = xor
; AVX: cost of 1 {{.*}} %V16I16 = xor
; AVX2: cost of 1 {{.*}} %V16I16 = xor
; AVX512: cost of 1 {{.*}} %V16I16 = xor
%V16I16 = xor <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %V32I16 = xor
; SSE42: cost of 4 {{.*}} %V32I16 = xor
+ ; SLM: cost of 4 {{.*}} %V32I16 = xor
+ ; GLM: cost of 4 {{.*}} %V32I16 = xor
; AVX: cost of 2 {{.*}} %V32I16 = xor
; AVX2: cost of 2 {{.*}} %V32I16 = xor
; AVX512F: cost of 2 {{.*}} %V32I16 = xor
@@ -352,18 +444,24 @@ define i32 @xor(i32 %arg) {
%I8 = xor i8 undef, undef
; SSSE3: cost of 1 {{.*}} %V16I8 = xor
; SSE42: cost of 1 {{.*}} %V16I8 = xor
+ ; SLM: cost of 1 {{.*}} %V16I8 = xor
+ ; GLM: cost of 1 {{.*}} %V16I8 = xor
; AVX: cost of 1 {{.*}} %V16I8 = xor
; AVX2: cost of 1 {{.*}} %V16I8 = xor
; AVX512: cost of 1 {{.*}} %V16I8 = xor
%V16I8 = xor <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %V32I8 = xor
; SSE42: cost of 2 {{.*}} %V32I8 = xor
+ ; SLM: cost of 2 {{.*}} %V32I8 = xor
+ ; GLM: cost of 2 {{.*}} %V32I8 = xor
; AVX: cost of 1 {{.*}} %V32I8 = xor
; AVX2: cost of 1 {{.*}} %V32I8 = xor
; AVX512: cost of 1 {{.*}} %V32I8 = xor
%V32I8 = xor <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %V64I8 = xor
; SSE42: cost of 4 {{.*}} %V64I8 = xor
+ ; SLM: cost of 4 {{.*}} %V64I8 = xor
+ ; GLM: cost of 4 {{.*}} %V64I8 = xor
; AVX: cost of 2 {{.*}} %V64I8 = xor
; AVX2: cost of 2 {{.*}} %V64I8 = xor
; AVX512F: cost of 2 {{.*}} %V64I8 = xor
@@ -379,18 +477,24 @@ define i32 @and(i32 %arg) {
%I64 = and i64 undef, undef
; SSSE3: cost of 1 {{.*}} %V2I64 = and
; SSE42: cost of 1 {{.*}} %V2I64 = and
+ ; SLM: cost of 1 {{.*}} %V2I64 = and
+ ; GLM: cost of 1 {{.*}} %V2I64 = and
; AVX: cost of 1 {{.*}} %V2I64 = and
; AVX2: cost of 1 {{.*}} %V2I64 = and
; AVX512: cost of 1 {{.*}} %V2I64 = and
%V2I64 = and <2 x i64> undef, undef
; SSSE3: cost of 2 {{.*}} %V4I64 = and
; SSE42: cost of 2 {{.*}} %V4I64 = and
+ ; SLM: cost of 2 {{.*}} %V4I64 = and
+ ; GLM: cost of 2 {{.*}} %V4I64 = and
; AVX: cost of 1 {{.*}} %V4I64 = and
; AVX2: cost of 1 {{.*}} %V4I64 = and
; AVX512: cost of 1 {{.*}} %V4I64 = and
%V4I64 = and <4 x i64> undef, undef
; SSSE3: cost of 4 {{.*}} %V8I64 = and
; SSE42: cost of 4 {{.*}} %V8I64 = and
+ ; SLM: cost of 4 {{.*}} %V8I64 = and
+ ; GLM: cost of 4 {{.*}} %V8I64 = and
; AVX: cost of 2 {{.*}} %V8I64 = and
; AVX2: cost of 2 {{.*}} %V8I64 = and
; AVX512: cost of 1 {{.*}} %V8I64 = and
@@ -400,18 +504,24 @@ define i32 @and(i32 %arg) {
%I32 = and i32 undef, undef
; SSSE3: cost of 1 {{.*}} %V4I32 = and
; SSE42: cost of 1 {{.*}} %V4I32 = and
+ ; SLM: cost of 1 {{.*}} %V4I32 = and
+ ; GLM: cost of 1 {{.*}} %V4I32 = and
; AVX: cost of 1 {{.*}} %V4I32 = and
; AVX2: cost of 1 {{.*}} %V4I32 = and
; AVX512: cost of 1 {{.*}} %V4I32 = and
%V4I32 = and <4 x i32> undef, undef
; SSSE3: cost of 2 {{.*}} %V8I32 = and
; SSE42: cost of 2 {{.*}} %V8I32 = and
+ ; SLM: cost of 2 {{.*}} %V8I32 = and
+ ; GLM: cost of 2 {{.*}} %V8I32 = and
; AVX: cost of 1 {{.*}} %V8I32 = and
; AVX2: cost of 1 {{.*}} %V8I32 = and
; AVX512: cost of 1 {{.*}} %V8I32 = and
%V8I32 = and <8 x i32> undef, undef
; SSSE3: cost of 4 {{.*}} %V16I32 = and
; SSE42: cost of 4 {{.*}} %V16I32 = and
+ ; SLM: cost of 4 {{.*}} %V16I32 = and
+ ; GLM: cost of 4 {{.*}} %V16I32 = and
; AVX: cost of 2 {{.*}} %V16I32 = and
; AVX2: cost of 2 {{.*}} %V16I32 = and
; AVX512: cost of 1 {{.*}} %V16I32 = and
@@ -421,18 +531,24 @@ define i32 @and(i32 %arg) {
%I16 = and i16 undef, undef
; SSSE3: cost of 1 {{.*}} %V8I16 = and
; SSE42: cost of 1 {{.*}} %V8I16 = and
+ ; SLM: cost of 1 {{.*}} %V8I16 = and
+ ; GLM: cost of 1 {{.*}} %V8I16 = and
; AVX: cost of 1 {{.*}} %V8I16 = and
; AVX2: cost of 1 {{.*}} %V8I16 = and
; AVX512: cost of 1 {{.*}} %V8I16 = and
%V8I16 = and <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %V16I16 = and
; SSE42: cost of 2 {{.*}} %V16I16 = and
+ ; SLM: cost of 2 {{.*}} %V16I16 = and
+ ; GLM: cost of 2 {{.*}} %V16I16 = and
; AVX: cost of 1 {{.*}} %V16I16 = and
; AVX2: cost of 1 {{.*}} %V16I16 = and
; AVX512: cost of 1 {{.*}} %V16I16 = and
%V16I16 = and <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %V32I16 = and
; SSE42: cost of 4 {{.*}} %V32I16 = and
+ ; SLM: cost of 4 {{.*}} %V32I16 = and
+ ; GLM: cost of 4 {{.*}} %V32I16 = and
; AVX: cost of 2 {{.*}} %V32I16 = and
; AVX2: cost of 2 {{.*}} %V32I16 = and
; AVX512F: cost of 2 {{.*}} %V32I16 = and
@@ -443,18 +559,24 @@ define i32 @and(i32 %arg) {
%I8 = and i8 undef, undef
; SSSE3: cost of 1 {{.*}} %V16I8 = and
; SSE42: cost of 1 {{.*}} %V16I8 = and
+ ; SLM: cost of 1 {{.*}} %V16I8 = and
+ ; GLM: cost of 1 {{.*}} %V16I8 = and
; AVX: cost of 1 {{.*}} %V16I8 = and
; AVX2: cost of 1 {{.*}} %V16I8 = and
; AVX512: cost of 1 {{.*}} %V16I8 = and
%V16I8 = and <16 x i8> undef, undef
; SSSE3: cost of 2 {{.*}} %V32I8 = and
; SSE42: cost of 2 {{.*}} %V32I8 = and
+ ; SLM: cost of 2 {{.*}} %V32I8 = and
+ ; GLM: cost of 2 {{.*}} %V32I8 = and
; AVX: cost of 1 {{.*}} %V32I8 = and
; AVX2: cost of 1 {{.*}} %V32I8 = and
; AVX512: cost of 1 {{.*}} %V32I8 = and
%V32I8 = and <32 x i8> undef, undef
; SSSE3: cost of 4 {{.*}} %V64I8 = and
; SSE42: cost of 4 {{.*}} %V64I8 = and
+ ; SLM: cost of 4 {{.*}} %V64I8 = and
+ ; GLM: cost of 4 {{.*}} %V64I8 = and
; AVX: cost of 2 {{.*}} %V64I8 = and
; AVX2: cost of 2 {{.*}} %V64I8 = and
; AVX512F: cost of 2 {{.*}} %V64I8 = and
@@ -470,6 +592,8 @@ define i32 @mul(i32 %arg) {
%I64 = mul i64 undef, undef
; SSSE3: cost of 8 {{.*}} %V2I64 = mul
; SSE42: cost of 8 {{.*}} %V2I64 = mul
+ ; SLM: cost of 17 {{.*}} %V2I64 = mul
+ ; GLM: cost of 8 {{.*}} %V2I64 = mul
; AVX: cost of 8 {{.*}} %V2I64 = mul
; AVX2: cost of 8 {{.*}} %V2I64 = mul
; AVX512F: cost of 8 {{.*}} %V2I64 = mul
@@ -478,6 +602,8 @@ define i32 @mul(i32 %arg) {
%V2I64 = mul <2 x i64> undef, undef
; SSSE3: cost of 16 {{.*}} %V4I64 = mul
; SSE42: cost of 16 {{.*}} %V4I64 = mul
+ ; SLM: cost of 34 {{.*}} %V4I64 = mul
+ ; GLM: cost of 16 {{.*}} %V4I64 = mul
; AVX: cost of 18 {{.*}} %V4I64 = mul
; AVX2: cost of 8 {{.*}} %V4I64 = mul
; AVX512F: cost of 8 {{.*}} %V4I64 = mul
@@ -486,6 +612,8 @@ define i32 @mul(i32 %arg) {
%V4I64 = mul <4 x i64> undef, undef
; SSSE3: cost of 32 {{.*}} %V8I64 = mul
; SSE42: cost of 32 {{.*}} %V8I64 = mul
+ ; SLM: cost of 68 {{.*}} %V8I64 = mul
+ ; GLM: cost of 32 {{.*}} %V8I64 = mul
; AVX: cost of 36 {{.*}} %V8I64 = mul
; AVX2: cost of 16 {{.*}} %V8I64 = mul
; AVX512F: cost of 8 {{.*}} %V8I64 = mul
@@ -497,18 +625,24 @@ define i32 @mul(i32 %arg) {
%I32 = mul i32 undef, undef
; SSSE3: cost of 6 {{.*}} %V4I32 = mul
; SSE42: cost of 2 {{.*}} %V4I32 = mul
+ ; SLM: cost of 11 {{.*}} %V4I32 = mul
+ ; GLM: cost of 2 {{.*}} %V4I32 = mul
; AVX: cost of 2 {{.*}} %V4I32 = mul
; AVX2: cost of 2 {{.*}} %V4I32 = mul
; AVX512: cost of 1 {{.*}} %V4I32 = mul
%V4I32 = mul <4 x i32> undef, undef
; SSSE3: cost of 12 {{.*}} %V8I32 = mul
; SSE42: cost of 4 {{.*}} %V8I32 = mul
+ ; SLM: cost of 22 {{.*}} %V8I32 = mul
+ ; GLM: cost of 4 {{.*}} %V8I32 = mul
; AVX: cost of 4 {{.*}} %V8I32 = mul
; AVX2: cost of 2 {{.*}} %V8I32 = mul
; AVX512: cost of 1 {{.*}} %V8I32 = mul
%V8I32 = mul <8 x i32> undef, undef
; SSSE3: cost of 24 {{.*}} %V16I32 = mul
; SSE42: cost of 8 {{.*}} %V16I32 = mul
+ ; SLM: cost of 44 {{.*}} %V16I32 = mul
+ ; GLM: cost of 8 {{.*}} %V16I32 = mul
; AVX: cost of 8 {{.*}} %V16I32 = mul
; AVX2: cost of 4 {{.*}} %V16I32 = mul
; AVX512: cost of 1 {{.*}} %V16I32 = mul
@@ -518,18 +652,24 @@ define i32 @mul(i32 %arg) {
%I16 = mul i16 undef, undef
; SSSE3: cost of 1 {{.*}} %V8I16 = mul
; SSE42: cost of 1 {{.*}} %V8I16 = mul
+ ; SLM: cost of 2 {{.*}} %V8I16 = mul
+ ; GLM: cost of 1 {{.*}} %V8I16 = mul
; AVX: cost of 1 {{.*}} %V8I16 = mul
; AVX2: cost of 1 {{.*}} %V8I16 = mul
; AVX512: cost of 1 {{.*}} %V8I16 = mul
%V8I16 = mul <8 x i16> undef, undef
; SSSE3: cost of 2 {{.*}} %V16I16 = mul
; SSE42: cost of 2 {{.*}} %V16I16 = mul
+ ; SLM: cost of 4 {{.*}} %V16I16 = mul
+ ; GLM: cost of 2 {{.*}} %V16I16 = mul
; AVX: cost of 4 {{.*}} %V16I16 = mul
; AVX2: cost of 1 {{.*}} %V16I16 = mul
; AVX512: cost of 1 {{.*}} %V16I16 = mul
%V16I16 = mul <16 x i16> undef, undef
; SSSE3: cost of 4 {{.*}} %V32I16 = mul
; SSE42: cost of 4 {{.*}} %V32I16 = mul
+ ; SLM: cost of 8 {{.*}} %V32I16 = mul
+ ; GLM: cost of 4 {{.*}} %V32I16 = mul
; AVX: cost of 8 {{.*}} %V32I16 = mul
; AVX2: cost of 2 {{.*}} %V32I16 = mul
; AVX512F: cost of 2 {{.*}} %V32I16 = mul
@@ -540,6 +680,8 @@ define i32 @mul(i32 %arg) {
%I8 = mul i8 undef, undef
; SSSE3: cost of 12 {{.*}} %V16I8 = mul
; SSE42: cost of 12 {{.*}} %V16I8 = mul
+ ; SLM: cost of 14 {{.*}} %V16I8 = mul
+ ; GLM: cost of 12 {{.*}} %V16I8 = mul
; AVX: cost of 12 {{.*}} %V16I8 = mul
; AVX2: cost of 7 {{.*}} %V16I8 = mul
; AVX512F: cost of 5 {{.*}} %V16I8 = mul
@@ -547,6 +689,8 @@ define i32 @mul(i32 %arg) {
%V16I8 = mul <16 x i8> undef, undef
; SSSE3: cost of 24 {{.*}} %V32I8 = mul
; SSE42: cost of 24 {{.*}} %V32I8 = mul
+ ; SLM: cost of 28 {{.*}} %V32I8 = mul
+ ; GLM: cost of 24 {{.*}} %V32I8 = mul
; AVX: cost of 26 {{.*}} %V32I8 = mul
; AVX2: cost of 17 {{.*}} %V32I8 = mul
; AVX512F: cost of 13 {{.*}} %V32I8 = mul
@@ -554,6 +698,8 @@ define i32 @mul(i32 %arg) {
%V32I8 = mul <32 x i8> undef, undef
; SSSE3: cost of 48 {{.*}} %V64I8 = mul
; SSE42: cost of 48 {{.*}} %V64I8 = mul
+ ; SLM: cost of 56 {{.*}} %V64I8 = mul
+ ; GLM: cost of 48 {{.*}} %V64I8 = mul
; AVX: cost of 52 {{.*}} %V64I8 = mul
; AVX2: cost of 34 {{.*}} %V64I8 = mul
; AVX512F: cost of 26 {{.*}} %V64I8 = mul
@@ -570,6 +716,8 @@ define void @mul_2i32() {
; 3 PMULUDQ and 2 PADDS and 4 shifts.
; SSSE3: cost of 8 {{.*}} %A0 = mul
; SSE42: cost of 8 {{.*}} %A0 = mul
+ ; SLM: cost of 17 {{.*}} %A0 = mul
+ ; GLM: cost of 8 {{.*}} %A0 = mul
; AVX: cost of 8 {{.*}} %A0 = mul
; AVX2: cost of 8 {{.*}} %A0 = mul
; AVX512F: cost of 8 {{.*}} %A0 = mul
More information about the llvm-commits
mailing list