[llvm] b1c3820 - [X86] Improve costmodel for scalar byte swaps
Roman Lebedev via llvm-commits
llvm-commits at lists.llvm.org
Sat May 8 05:18:09 PDT 2021
Author: Roman Lebedev
Date: 2021-05-08T15:17:35+03:00
New Revision: b1c38207e9ca6aba883a8000239163520ee6ed83
URL: https://github.com/llvm/llvm-project/commit/b1c38207e9ca6aba883a8000239163520ee6ed83
DIFF: https://github.com/llvm/llvm-project/commit/b1c38207e9ca6aba883a8000239163520ee6ed83.diff
LOG: [X86] Improve costmodel for scalar byte swaps
Currently we model i16 bswap as very high cost (`10`),
which doesn't seem right, with all other being at `1`.
Regardless of `MOVBE`, i16 reg-reg bswap is lowered into
(an extending move plus) rot-by-8:
https://godbolt.org/z/8jrq7fMTj
I think it should at worst have throughput of `1`:
Since i32/i64 already have cost of `1`,
`MOVBE` doesn't improve their costs any further.
BUT, `MOVBE` must have at least a single memory operand,
with other being a register. Which means, if we have
a bswap of load, iff load has a single use,
we'll fold bswap into load.
Likewise, if we have store of a bswap, iff bswap
has a single use, we'll fold bswap into store.
So i think we should treat such a bswap as free,
unless of course we know that for the particular CPU
they are performing badly.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D101924
Added:
Modified:
llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86Subtarget.h
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/test/Analysis/CostModel/X86/bswap-store.ll
llvm/test/Analysis/CostModel/X86/bswap.ll
llvm/test/Analysis/CostModel/X86/load-bswap.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 1af007f093830..6fe7c21754779 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -514,6 +514,10 @@ def FeatureFastVectorShiftMasks
"fast-vector-shift-masks", "HasFastVectorShiftMasks", "true",
"Prefer a left/right vector logical shift pair over a shift+and pair">;
+def FeatureFastMOVBE
+ : SubtargetFeature<"fast-movbe", "HasFastMOVBE", "true",
+ "Prefer a movbe over a single-use load + bswap / single-use bswap + store">;
+
def FeatureUseGLMDivSqrtCosts
: SubtargetFeature<"use-glm-div-sqrt-costs", "UseGLMDivSqrtCosts", "true",
"Use Goldmont specific floating point div/sqrt costs">;
@@ -820,6 +824,7 @@ def ProcessorFeatures {
FeatureSlowDivide64,
FeatureSlowPMULLD,
FeatureFast7ByteNOP,
+ FeatureFastMOVBE,
FeaturePOPCNTFalseDeps,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> SLMFeatures =
@@ -839,6 +844,7 @@ def ProcessorFeatures {
FeatureSlowTwoMemOps,
FeatureSlowLEA,
FeatureSlowIncDec,
+ FeatureFastMOVBE,
FeaturePOPCNTFalseDeps,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> GLMFeatures =
@@ -851,6 +857,7 @@ def ProcessorFeatures {
FeatureSlowTwoMemOps,
FeatureSlowLEA,
FeatureSlowIncDec,
+ FeatureFastMOVBE,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> GLPFeatures =
!listconcat(GLMFeatures, GLPAdditionalFeatures);
@@ -924,6 +931,7 @@ def ProcessorFeatures {
FeatureSlowTwoMemOps,
FeaturePreferMaskRegisters,
FeatureHasFastGather,
+ FeatureFastMOVBE,
FeatureSlowPMADDWD];
// TODO Add AVX5124FMAPS/AVX5124VNNIW features
list<SubtargetFeature> KNMFeatures =
@@ -983,6 +991,7 @@ def ProcessorFeatures {
FeatureFast15ByteNOP,
FeatureFastScalarShiftMasks,
FeatureFastVectorShiftMasks,
+ FeatureFastMOVBE,
FeatureSlowSHLD];
list<SubtargetFeature> BtVer2Features =
!listconcat(BtVer1Features, BtVer2AdditionalFeatures);
@@ -1017,7 +1026,9 @@ def ProcessorFeatures {
FeatureTBM,
FeatureFMA,
FeatureFastBEXTR];
- list<SubtargetFeature> BdVer2Tuning = BdVer1Tuning;
+ list<SubtargetFeature> BdVer2AdditionalTuning = [FeatureFastMOVBE];
+ list<SubtargetFeature> BdVer2Tuning =
+ !listconcat(BdVer1Tuning, BdVer2AdditionalTuning);
list<SubtargetFeature> BdVer2Features =
!listconcat(BdVer1Features, BdVer2AdditionalFeatures);
@@ -1077,6 +1088,7 @@ def ProcessorFeatures {
FeatureFast15ByteNOP,
FeatureBranchFusion,
FeatureFastScalarShiftMasks,
+ FeatureFastMOVBE,
FeatureSlowSHLD,
FeatureInsertVZEROUPPER];
list<SubtargetFeature> ZN2AdditionalFeatures = [FeatureCLWB,
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 96bb960605432..e6a5be7cf63ca 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -433,6 +433,9 @@ class X86Subtarget final : public X86GenSubtargetInfo {
/// Prefer a left/right vector logical shifts pair over a shift+and pair.
bool HasFastVectorShiftMasks = false;
+ /// Prefer a movbe over a single-use load + bswap / single-use bswap + store.
+ bool HasFastMOVBE = false;
+
/// Use a retpoline thunk rather than indirect calls to block speculative
/// execution.
bool UseRetpolineIndirectCalls = false;
@@ -714,6 +717,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasFastHorizontalOps() const { return HasFastHorizontalOps; }
bool hasFastScalarShiftMasks() const { return HasFastScalarShiftMasks; }
bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; }
+ bool hasFastMOVBE() const { return HasFastMOVBE; }
bool hasMacroFusion() const { return HasMacroFusion; }
bool hasBranchFusion() const { return HasBranchFusion; }
bool hasERMSB() const { return HasERMSB; }
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 62401ee22aecf..eea1efd6a80a7 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -2695,6 +2695,7 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
static const CostTblEntry X64CostTbl[] = { // 64-bit targets
{ ISD::ABS, MVT::i64, 2 }, // SUB+CMOV
{ ISD::BITREVERSE, MVT::i64, 14 },
+ { ISD::BSWAP, MVT::i64, 1 },
{ ISD::CTLZ, MVT::i64, 4 }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTTZ, MVT::i64, 3 }, // TEST+BSF+CMOV/BRANCH
{ ISD::CTPOP, MVT::i64, 10 },
@@ -2708,6 +2709,8 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
{ ISD::BITREVERSE, MVT::i32, 14 },
{ ISD::BITREVERSE, MVT::i16, 14 },
{ ISD::BITREVERSE, MVT::i8, 11 },
+ { ISD::BSWAP, MVT::i32, 1 },
+ { ISD::BSWAP, MVT::i16, 1 }, // ROL
{ ISD::CTLZ, MVT::i32, 4 }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i16, 4 }, // BSR+XOR or BSR+XOR+CMOV
{ ISD::CTLZ, MVT::i8, 4 }, // BSR+XOR or BSR+XOR+CMOV
@@ -2919,6 +2922,17 @@ X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return adjustTableCost(*Entry, LT.first, ICA.getFlags());
}
+ if (ISD == ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) {
+ if (const Instruction *II = ICA.getInst()) {
+ if (II->hasOneUse() && isa<StoreInst>(II->user_back()))
+ return TTI::TCC_Free;
+ if (auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) {
+ if (LI->hasOneUse())
+ return TTI::TCC_Free;
+ }
+ }
+ }
+
// TODO - add BMI (TZCNT) scalar handling
if (ST->is64Bit())
diff --git a/llvm/test/Analysis/CostModel/X86/bswap-store.ll b/llvm/test/Analysis/CostModel/X86/bswap-store.ll
index 2cc21a721004c..1cdf083ba3568 100644
--- a/llvm/test/Analysis/CostModel/X86/bswap-store.ll
+++ b/llvm/test/Analysis/CostModel/X86/bswap-store.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,NOMOVBE,X64
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,MOVBE,X64
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,NOMOVBE,X86
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,MOVBE,X86
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,X64
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,X64-MOVBE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe,+fast-movbe | FileCheck %s --check-prefixes=ALL,X64-FASTMOVBE
+
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,X32
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,X32-MOVBE
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe,+fast-movbe | FileCheck %s --check-prefixes=ALL,X32-FASTMOVBE
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
@@ -10,15 +13,35 @@ declare i64 @llvm.bswap.i64(i64)
declare i128 @llvm.bswap.i128(i128)
define void @var_bswap_store_i16(i16 %a, i16* %dst) {
-; NOMOVBE-LABEL: 'var_bswap_store_i16'
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; X64-LABEL: 'var_bswap_store_i16'
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-MOVBE-LABEL: 'var_bswap_store_i16'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-FASTMOVBE-LABEL: 'var_bswap_store_i16'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
-; MOVBE-LABEL: 'var_bswap_store_i16'
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; X32-LABEL: 'var_bswap_store_i16'
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-MOVBE-LABEL: 'var_bswap_store_i16'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-FASTMOVBE-LABEL: 'var_bswap_store_i16'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i16 @llvm.bswap.i16(i16 %a)
store i16 %bswap, i16* %dst, align 1
@@ -26,17 +49,11 @@ define void @var_bswap_store_i16(i16 %a, i16* %dst) {
ret void
}
define void @var_bswap_store_i16_extrause(i16 %a, i16* %dst) {
-; NOMOVBE-LABEL: 'var_bswap_store_i16_extrause'
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i16 %bswap, 2
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
-;
-; MOVBE-LABEL: 'var_bswap_store_i16_extrause'
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i16 %bswap, 2
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; ALL-LABEL: 'var_bswap_store_i16_extrause'
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %bswap, i16* %dst, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i16 %bswap, 2
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i16 @llvm.bswap.i16(i16 %a)
store i16 %bswap, i16* %dst, align 1
@@ -47,10 +64,35 @@ define void @var_bswap_store_i16_extrause(i16 %a, i16* %dst) {
}
define void @var_bswap_store_i32(i32 %a, i32* %dst) {
-; ALL-LABEL: 'var_bswap_store_i32'
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; X64-LABEL: 'var_bswap_store_i32'
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-MOVBE-LABEL: 'var_bswap_store_i32'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-FASTMOVBE-LABEL: 'var_bswap_store_i32'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-LABEL: 'var_bswap_store_i32'
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-MOVBE-LABEL: 'var_bswap_store_i32'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-FASTMOVBE-LABEL: 'var_bswap_store_i32'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i32 %bswap, i32* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i32 @llvm.bswap.i32(i32 %a)
store i32 %bswap, i32* %dst, align 1
@@ -78,10 +120,30 @@ define void @var_bswap_store_i64(i64 %a, i64* %dst) {
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
-; X86-LABEL: 'var_bswap_store_i64'
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; X64-MOVBE-LABEL: 'var_bswap_store_i64'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-FASTMOVBE-LABEL: 'var_bswap_store_i64'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-LABEL: 'var_bswap_store_i64'
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-MOVBE-LABEL: 'var_bswap_store_i64'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-FASTMOVBE-LABEL: 'var_bswap_store_i64'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i64 @llvm.bswap.i64(i64 %a)
store i64 %bswap, i64* %dst, align 1
@@ -95,11 +157,35 @@ define void @var_bswap_store_i64_extrause(i64 %a, i64* %dst) {
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i64 %bswap, 2
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
-; X86-LABEL: 'var_bswap_store_i64_extrause'
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i64 %bswap, 2
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; X64-MOVBE-LABEL: 'var_bswap_store_i64_extrause'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i64 %bswap, 2
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-FASTMOVBE-LABEL: 'var_bswap_store_i64_extrause'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %bswap, i64* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap2 = shl i64 %bswap, 2
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-LABEL: 'var_bswap_store_i64_extrause'
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i64 %bswap, 2
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-MOVBE-LABEL: 'var_bswap_store_i64_extrause'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i64 %bswap, 2
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-FASTMOVBE-LABEL: 'var_bswap_store_i64_extrause'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %bswap, i64* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i64 %bswap, 2
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i64 @llvm.bswap.i64(i64 %a)
store i64 %bswap, i64* %dst, align 1
@@ -111,14 +197,34 @@ define void @var_bswap_store_i64_extrause(i64 %a, i64* %dst) {
define void @var_bswap_store_i128(i128 %a, i128* %dst) {
; X64-LABEL: 'var_bswap_store_i128'
-; X64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
-; X86-LABEL: 'var_bswap_store_i128'
-; X86-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; X64-MOVBE-LABEL: 'var_bswap_store_i128'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-FASTMOVBE-LABEL: 'var_bswap_store_i128'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-LABEL: 'var_bswap_store_i128'
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-MOVBE-LABEL: 'var_bswap_store_i128'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-FASTMOVBE-LABEL: 'var_bswap_store_i128'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i128 @llvm.bswap.i128(i128 %a)
store i128 %bswap, i128* %dst, align 1
@@ -127,16 +233,40 @@ define void @var_bswap_store_i128(i128 %a, i128* %dst) {
}
define void @var_bswap_store_i128_extrause(i128 %a, i128* %dst) {
; X64-LABEL: 'var_bswap_store_i128_extrause'
-; X64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i128 %bswap, 2
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
-; X86-LABEL: 'var_bswap_store_i128_extrause'
-; X86-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap2 = shl i128 %bswap, 2
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+; X64-MOVBE-LABEL: 'var_bswap_store_i128_extrause'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i128 %bswap, 2
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X64-FASTMOVBE-LABEL: 'var_bswap_store_i128_extrause'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %bswap, i128* %dst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap2 = shl i128 %bswap, 2
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-LABEL: 'var_bswap_store_i128_extrause'
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap2 = shl i128 %bswap, 2
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-MOVBE-LABEL: 'var_bswap_store_i128_extrause'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap2 = shl i128 %bswap, 2
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
+;
+; X32-FASTMOVBE-LABEL: 'var_bswap_store_i128_extrause'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %bswap, i128* %dst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap2 = shl i128 %bswap, 2
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
;
%bswap = call i128 @llvm.bswap.i128(i128 %a)
store i128 %bswap, i128* %dst, align 1
diff --git a/llvm/test/Analysis/CostModel/X86/bswap.ll b/llvm/test/Analysis/CostModel/X86/bswap.ll
index 940838d8fdc97..5ed8e4a039594 100644
--- a/llvm/test/Analysis/CostModel/X86/bswap.ll
+++ b/llvm/test/Analysis/CostModel/X86/bswap.ll
@@ -1,8 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,NOMOVBE,X64
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,MOVBE,X64
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,NOMOVBE,X86
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,MOVBE,X86
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,X64
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,X64
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,X86
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,X86
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
@@ -12,13 +12,9 @@ declare i128 @llvm.bswap.i128(i128)
; Verify the cost of scalar bswap instructions.
define i16 @var_bswap_i16(i16 %a) {
-; NOMOVBE-LABEL: 'var_bswap_i16'
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
-;
-; MOVBE-LABEL: 'var_bswap_i16'
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+; ALL-LABEL: 'var_bswap_i16'
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
%bswap = call i16 @llvm.bswap.i16(i16 %a)
ret i16 %bswap
@@ -39,7 +35,7 @@ define i64 @var_bswap_i64(i64 %a) {
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
; X86-LABEL: 'var_bswap_i64'
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
%bswap = call i64 @llvm.bswap.i64(i64 %a)
@@ -48,11 +44,11 @@ define i64 @var_bswap_i64(i64 %a) {
define i128 @var_bswap_i128(i128 %a) {
; X64-LABEL: 'var_bswap_i128'
-; X64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
; X86-LABEL: 'var_bswap_i128'
-; X86-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
%bswap = call i128 @llvm.bswap.i128(i128 %a)
diff --git a/llvm/test/Analysis/CostModel/X86/load-bswap.ll b/llvm/test/Analysis/CostModel/X86/load-bswap.ll
index 8f524b6494104..9501a1abdcc1a 100644
--- a/llvm/test/Analysis/CostModel/X86/load-bswap.ll
+++ b/llvm/test/Analysis/CostModel/X86/load-bswap.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,NOMOVBE,X64
-; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,MOVBE,X64
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,NOMOVBE,X86
-; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,MOVBE,X86
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,X64
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,X64-MOVBE
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -cost-model -analyze -mattr=+movbe,+fast-movbe | FileCheck %s --check-prefixes=ALL,X64-FASTMOVBE
+
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze | FileCheck %s --check-prefixes=ALL,X32
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe | FileCheck %s --check-prefixes=ALL,X32-MOVBE
+; RUN: opt < %s -mtriple=i686-unknown-linux-gnu -cost-model -analyze -mattr=+movbe,+fast-movbe | FileCheck %s --check-prefixes=ALL,X32-FASTMOVBE
declare i16 @llvm.bswap.i16(i16)
declare i32 @llvm.bswap.i32(i32)
@@ -10,15 +13,35 @@ declare i64 @llvm.bswap.i64(i64)
declare i128 @llvm.bswap.i128(i128)
define i16 @var_load_bswap_i16(i16* %src) {
-; NOMOVBE-LABEL: 'var_load_bswap_i16'
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+; X64-LABEL: 'var_load_bswap_i16'
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+;
+; X64-MOVBE-LABEL: 'var_load_bswap_i16'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+;
+; X64-FASTMOVBE-LABEL: 'var_load_bswap_i16'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+;
+; X32-LABEL: 'var_load_bswap_i16'
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
-; MOVBE-LABEL: 'var_load_bswap_i16'
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+; X32-MOVBE-LABEL: 'var_load_bswap_i16'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+;
+; X32-FASTMOVBE-LABEL: 'var_load_bswap_i16'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
%a = load i16, i16* %src, align 1
%bswap = call i16 @llvm.bswap.i16(i16 %a)
@@ -26,19 +49,12 @@ define i16 @var_load_bswap_i16(i16* %src) {
ret i16 %bswap
}
define i16 @var_load_bswap_i16_extrause(i16* %src, i16* %clobberdst) {
-; NOMOVBE-LABEL: 'var_load_bswap_i16_extrause'
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i16 %a, 2
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %a2, i16* %clobberdst, align 1
-; NOMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
-;
-; MOVBE-LABEL: 'var_load_bswap_i16_extrause'
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i16 %a, 2
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %a2, i16* %clobberdst, align 1
-; MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
+; ALL-LABEL: 'var_load_bswap_i16_extrause'
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i16, i16* %src, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i16 @llvm.bswap.i16(i16 %a)
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i16 %a, 2
+; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i16 %a2, i16* %clobberdst, align 1
+; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i16 %bswap
;
%a = load i16, i16* %src, align 1
%bswap = call i16 @llvm.bswap.i16(i16 %a)
@@ -50,10 +66,35 @@ define i16 @var_load_bswap_i16_extrause(i16* %src, i16* %clobberdst) {
}
define i32 @var_load_bswap_i32(i32* %src) {
-; ALL-LABEL: 'var_load_bswap_i32'
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
-; ALL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
-; ALL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
+; X64-LABEL: 'var_load_bswap_i32'
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
+;
+; X64-MOVBE-LABEL: 'var_load_bswap_i32'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
+;
+; X64-FASTMOVBE-LABEL: 'var_load_bswap_i32'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
+;
+; X32-LABEL: 'var_load_bswap_i32'
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
+;
+; X32-MOVBE-LABEL: 'var_load_bswap_i32'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
+;
+; X32-FASTMOVBE-LABEL: 'var_load_bswap_i32'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i32, i32* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i32 @llvm.bswap.i32(i32 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 %bswap
;
%a = load i32, i32* %src, align 1
%bswap = call i32 @llvm.bswap.i32(i32 %a)
@@ -83,10 +124,30 @@ define i64 @var_load_bswap_i64(i64* %src) {
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
-; X86-LABEL: 'var_load_bswap_i64'
-; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+; X64-MOVBE-LABEL: 'var_load_bswap_i64'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X64-FASTMOVBE-LABEL: 'var_load_bswap_i64'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X32-LABEL: 'var_load_bswap_i64'
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X32-MOVBE-LABEL: 'var_load_bswap_i64'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X32-FASTMOVBE-LABEL: 'var_load_bswap_i64'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
%a = load i64, i64* %src, align 1
%bswap = call i64 @llvm.bswap.i64(i64 %a)
@@ -101,12 +162,40 @@ define i64 @var_load_bswap_i64_extrause(i64* %src, i64* %clobberdst) {
; X64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, i64* %clobberdst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
-; X86-LABEL: 'var_load_bswap_i64_extrause'
-; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i64 %a, 2
-; X86-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, i64* %clobberdst, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+; X64-MOVBE-LABEL: 'var_load_bswap_i64_extrause'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i64 %a, 2
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X64-FASTMOVBE-LABEL: 'var_load_bswap_i64_extrause'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a = load i64, i64* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %a2 = shl i64 %a, 2
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X32-LABEL: 'var_load_bswap_i64_extrause'
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i64 %a, 2
+; X32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X32-MOVBE-LABEL: 'var_load_bswap_i64_extrause'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i64 %a, 2
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
+;
+; X32-FASTMOVBE-LABEL: 'var_load_bswap_i64_extrause'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i64, i64* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i64 @llvm.bswap.i64(i64 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i64 %a, 2
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i64 %a2, i64* %clobberdst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i64 %bswap
;
%a = load i64, i64* %src, align 1
%bswap = call i64 @llvm.bswap.i64(i64 %a)
@@ -120,13 +209,33 @@ define i64 @var_load_bswap_i64_extrause(i64* %src, i64* %clobberdst) {
define i128 @var_load_bswap_i128(i128* %src) {
; X64-LABEL: 'var_load_bswap_i128'
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
-; X64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
-; X86-LABEL: 'var_load_bswap_i128'
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+; X64-MOVBE-LABEL: 'var_load_bswap_i128'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X64-FASTMOVBE-LABEL: 'var_load_bswap_i128'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X32-LABEL: 'var_load_bswap_i128'
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X32-MOVBE-LABEL: 'var_load_bswap_i128'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X32-FASTMOVBE-LABEL: 'var_load_bswap_i128'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
%a = load i128, i128* %src, align 1
%bswap = call i128 @llvm.bswap.i128(i128 %a)
@@ -136,17 +245,45 @@ define i128 @var_load_bswap_i128(i128* %src) {
define i128 @var_load_bswap_i128_extrause(i128* %src, i128* %clobberdst) {
; X64-LABEL: 'var_load_bswap_i128_extrause'
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
-; X64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i128 %a, 2
; X64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, i128* %clobberdst, align 1
; X64-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
-; X86-LABEL: 'var_load_bswap_i128_extrause'
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2 = shl i128 %a, 2
-; X86-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, i128* %clobberdst, align 1
-; X86-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+; X64-MOVBE-LABEL: 'var_load_bswap_i128_extrause'
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i128 %a, 2
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X64-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X64-FASTMOVBE-LABEL: 'var_load_bswap_i128_extrause'
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a = load i128, i128* %src, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %a2 = shl i128 %a, 2
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X64-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X32-LABEL: 'var_load_bswap_i128_extrause'
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2 = shl i128 %a, 2
+; X32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X32-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X32-MOVBE-LABEL: 'var_load_bswap_i128_extrause'
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2 = shl i128 %a, 2
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X32-MOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
+;
+; X32-FASTMOVBE-LABEL: 'var_load_bswap_i128_extrause'
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a = load i128, i128* %src, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %bswap = call i128 @llvm.bswap.i128(i128 %a)
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %a2 = shl i128 %a, 2
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: store i128 %a2, i128* %clobberdst, align 1
+; X32-FASTMOVBE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i128 %bswap
;
%a = load i128, i128* %src, align 1
%bswap = call i128 @llvm.bswap.i128(i128 %a)
More information about the llvm-commits
mailing list