[llvm] [RISCV] Add +optimized-nfN-segment-load-store (PR #114414)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 31 10:03:10 PDT 2024
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/114414
>From 16f895c4fc79cd7fcb27ef995352100e7ea980d6 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 31 Oct 2024 23:04:06 +0800
Subject: [PATCH 1/2] [RISCV] Add +optimized-nf{3,4}-segment-load-store
This is a follow up to #111511, where after benchmarking we learnt that the Banana Pi F3 has fast segmented loads for not just NF=2, but also NF=3 and NF=4: https://github.com/preames/bp3-microarch#vlseg_lmul_x_sew_throughput
This adds a tuning feature to allow these segment loads and stores to be costed cheaper and enables it for the spacemit-x60.
---
llvm/lib/Target/RISCV/RISCVFeatures.td | 7 ++
llvm/lib/Target/RISCV/RISCVProcessors.td | 4 +-
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 7 +-
.../LoopVectorize/RISCV/interleaved-cost.ll | 70 +++++++++++++------
4 files changed, 62 insertions(+), 26 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 1e4bf1b8830bcc..719a39944c6836 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1386,6 +1386,13 @@ def TuneOptimizedZeroStrideLoad
"true", "Optimized (perform fewer memory operations)"
"zero-stride vector load">;
+foreach nf = {3-4} in
+ def TuneOptimizedNF#nf#SegmentLoadStore :
+ SubtargetFeature<"optimized-nf"#nf#"-segment-load-store",
+ "HasOptimizedNF"#nf#"SegmentLoadStore",
+ "true", "vlseg"#nf#"eN.v and vsseg"#nf#"eN.v are"
+ "implemented as a wide memory op and shuffle">;
+
def Experimental
: SubtargetFeature<"experimental", "HasExperimental",
"true", "Experimental intrinsics">;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 364aa35f09453b..c73bd5e6a1f7ce 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -472,7 +472,9 @@ def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60",
FeatureStdExtZvfh,
FeatureStdExtZvkt,
FeatureStdExtZvl256b]),
- [TuneDLenFactor2]>;
+ [TuneDLenFactor2,
+ TuneOptimizedNF3SegmentLoadStore,
+ TuneOptimizedNF4SegmentLoadStore]>;
def RP2350_HAZARD3 : RISCVProcessorModel<"rp2350-hazard3",
NoSchedModel,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index f050fb569946d6..936e0a9bfdffcc 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -738,8 +738,11 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
AddressSpace, DL)) {
// Most available hardware today optimizes NF=2 as as one wide memory op
- // + Factor * LMUL shuffle ops.
- if (Factor == 2) {
+ // + Factor * LMUL shuffle ops. Some processors may also optimize NF=3
+ // and NF=4.
+ if (Factor == 2 ||
+ (Factor == 3 && ST->hasOptimizedNF3SegmentLoadStore()) ||
+ (Factor == 4 && ST->hasOptimizedNF4SegmentLoadStore())) {
InstructionCost Cost =
getMemoryOpCost(Opcode, VTy, Alignment, AddressSpace, CostKind);
MVT SubVecVT = getTLI()->getValueType(DL, SubVecTy).getSimpleVT();
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
index 6477f14e3c6984..b5f26dd001f884 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
@@ -1,5 +1,7 @@
; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NO-OPT
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf3-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPT-NF3
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf4-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPT-NF4
%i8.2 = type {i8, i8}
define void @i8_factor_2(ptr %data, i64 %n) {
@@ -48,17 +50,28 @@ for.end:
define void @i8_factor_3(ptr %data, i64 %n) {
entry:
br label %for.body
-; CHECK-LABEL: Checking a loop in 'i8_factor_3'
-; CHECK: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
-; CHECK: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
-; CHECK: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
-; CHECK: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
-; CHECK: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
-; CHECK: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
-; CHECK: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
-; CHECK: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
-; CHECK: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
-; CHECK: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; OPT-NF3-LABEL: Checking a loop in 'i8_factor_3'
+; OPT-NF3: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; OPT-NF3: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; OPT-NF3: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; OPT-NF3: Cost of 4 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; OPT-NF3: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; OPT-NF3: Cost of 5 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; OPT-NF3: Cost of 7 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; OPT-NF3: Cost of 7 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; OPT-NF3: Cost of 14 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; OPT-NF3: Cost of 14 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; NO-OPT-LABEL: Checking a loop in 'i8_factor_3'
+; NO-OPT: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; NO-OPT: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; NO-OPT: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; NO-OPT: Cost of 12 for VF 4: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; NO-OPT: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; NO-OPT: Cost of 24 for VF 8: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; NO-OPT: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; NO-OPT: Cost of 48 for VF 16: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
+; NO-OPT: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at %l0, ir<%p0>
+; NO-OPT: Cost of 96 for VF 32: INTERLEAVE-GROUP with factor 3 at <badref>, ir<%p0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0
@@ -85,17 +98,28 @@ for.end:
define void @i8_factor_4(ptr %data, i64 %n) {
entry:
br label %for.body
-; CHECK-LABEL: Checking a loop in 'i8_factor_4'
-; CHECK: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
-; CHECK: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
-; CHECK: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
-; CHECK: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
-; CHECK: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
-; CHECK: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
-; CHECK: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
-; CHECK: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
-; CHECK: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
-; CHECK: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; OPT-NF4-LABEL: Checking a loop in 'i8_factor_4'
+; OPT-NF4: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; OPT-NF4: Cost of 5 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; OPT-NF4: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; OPT-NF4: Cost of 5 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; OPT-NF4: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; OPT-NF4: Cost of 6 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; OPT-NF4: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; OPT-NF4: Cost of 8 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; OPT-NF4: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; OPT-NF4: Cost of 16 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; NO-OPT-LABEL: Checking a loop in 'i8_factor_4'
+; NO-OPT: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; NO-OPT: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; NO-OPT: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; NO-OPT: Cost of 16 for VF 4: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; NO-OPT: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; NO-OPT: Cost of 32 for VF 8: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; NO-OPT: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; NO-OPT: Cost of 64 for VF 16: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
+; NO-OPT: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at %l0, ir<%p0>
+; NO-OPT: Cost of 128 for VF 32: INTERLEAVE-GROUP with factor 4 at <badref>, ir<%p0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0
>From 3984711dbd773682e1afcf000ff13ea327bb3434 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 1 Nov 2024 01:02:25 +0800
Subject: [PATCH 2/2] Add other NFs, including NF2. Add NF2 to the generic
processor model
---
llvm/lib/Target/RISCV/RISCVFeatures.td | 2 +-
llvm/lib/Target/RISCV/RISCVProcessors.td | 12 +-
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 31 ++-
.../LoopVectorize/RISCV/interleaved-cost.ll | 182 ++++++++++++------
4 files changed, 157 insertions(+), 70 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 719a39944c6836..f2e661f007d115 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1386,7 +1386,7 @@ def TuneOptimizedZeroStrideLoad
"true", "Optimized (perform fewer memory operations)"
"zero-stride vector load">;
-foreach nf = {3-4} in
+foreach nf = {2-8} in
def TuneOptimizedNF#nf#SegmentLoadStore :
SubtargetFeature<"optimized-nf"#nf#"-segment-load-store",
"HasOptimizedNF"#nf#"SegmentLoadStore",
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index c73bd5e6a1f7ce..5277752a38ad9e 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -57,15 +57,19 @@ class RISCVTuneProcessorModel<string n,
list<SubtargetFeature> f = []>
: ProcessorModel<n, m, f,tunef>;
+defvar GenericTuneFeatures = [TuneOptimizedNF2SegmentLoadStore];
+
def GENERIC_RV32 : RISCVProcessorModel<"generic-rv32",
NoSchedModel,
[Feature32Bit,
- FeatureStdExtI]>,
+ FeatureStdExtI],
+ GenericTuneFeatures>,
GenericTuneInfo;
def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64",
NoSchedModel,
[Feature64Bit,
- FeatureStdExtI]>,
+ FeatureStdExtI],
+ GenericTuneFeatures>,
GenericTuneInfo;
// Support generic for compatibility with other targets. The triple will be used
// to change to the appropriate rv32/rv64 version.
@@ -221,7 +225,8 @@ def SIFIVE_U74 : RISCVProcessorModel<"sifive-u74",
defvar SiFiveX280TuneFeatures = !listconcat(SiFive7TuneFeatures,
[TuneDLenFactor2,
- TuneOptimizedZeroStrideLoad]);
+ TuneOptimizedZeroStrideLoad,
+ TuneOptimizedNF2SegmentLoadStore]);
def SIFIVE_X280 : RISCVProcessorModel<"sifive-x280", SiFive7Model,
[Feature64Bit,
FeatureStdExtI,
@@ -473,6 +478,7 @@ def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60",
FeatureStdExtZvkt,
FeatureStdExtZvl256b]),
[TuneDLenFactor2,
+ TuneOptimizedNF2SegmentLoadStore,
TuneOptimizedNF3SegmentLoadStore,
TuneOptimizedNF4SegmentLoadStore]>;
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 936e0a9bfdffcc..b7a559b8ba39ad 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -716,6 +716,28 @@ RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
}
+static bool hasOptimizedSegmentLoadStore(unsigned NF,
+ const RISCVSubtarget *ST) {
+ switch (NF) {
+ case 2:
+ return ST->hasOptimizedNF2SegmentLoadStore();
+ case 3:
+ return ST->hasOptimizedNF3SegmentLoadStore();
+ case 4:
+ return ST->hasOptimizedNF4SegmentLoadStore();
+ case 5:
+ return ST->hasOptimizedNF5SegmentLoadStore();
+ case 6:
+ return ST->hasOptimizedNF6SegmentLoadStore();
+ case 7:
+ return ST->hasOptimizedNF7SegmentLoadStore();
+ case 8:
+ return ST->hasOptimizedNF8SegmentLoadStore();
+ default:
+ llvm_unreachable("Unexpected NF");
+ }
+}
+
InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
@@ -737,12 +759,9 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
TLI->isLegalInterleavedAccessType(SubVecTy, Factor, Alignment,
AddressSpace, DL)) {
- // Most available hardware today optimizes NF=2 as as one wide memory op
- // + Factor * LMUL shuffle ops. Some processors may also optimize NF=3
- // and NF=4.
- if (Factor == 2 ||
- (Factor == 3 && ST->hasOptimizedNF3SegmentLoadStore()) ||
- (Factor == 4 && ST->hasOptimizedNF4SegmentLoadStore())) {
+ // Some processors optimize segment loads/stores as one wide memory op +
+ // Factor * LMUL shuffle ops.
+ if (hasOptimizedSegmentLoadStore(Factor, ST)) {
InstructionCost Cost =
getMemoryOpCost(Opcode, VTy, Alignment, AddressSpace, CostKind);
MVT SubVecVT = getTLI()->getValueType(DL, SubVecTy).getSimpleVT();
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
index b5f26dd001f884..d6f16bfcba1af4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
@@ -1,33 +1,59 @@
; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,NO-OPT
-; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf3-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPT-NF3
-; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf4-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,OPT-NF4
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,-optimized-nf2-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=NO-OPT
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=OPT-NF2
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf3-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=OPT-NF3
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf4-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=OPT-NF4
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf5-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=OPT-NF5
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf6-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=OPT-NF6
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf7-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=OPT-NF7
+; RUN: opt -passes=loop-vectorize -mtriple=riscv64 -mattr=+v,+optimized-nf8-segment-load-store -debug-only=loop-vectorize -disable-output < %s 2>&1 | FileCheck %s --check-prefix=OPT-NF8
%i8.2 = type {i8, i8}
define void @i8_factor_2(ptr %data, i64 %n) {
entry:
br label %for.body
-; CHECK-LABEL: Checking a loop in 'i8_factor_2'
-; CHECK: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
-; CHECK: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
-; CHECK: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2-LABEL: Checking a loop in 'i8_factor_2'
+; OPT-NF2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 3 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 3 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 3 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 4 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 8 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 3 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 3 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 3 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 4 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; OPT-NF2: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; OPT-NF2: Cost of 8 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT-LABEL: Checking a loop in 'i8_factor_2'
+; NO-OPT: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 4 for VF 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 16 for VF 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 32 for VF 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 64 for VF 32: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 64 for VF 32: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 4 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 4 for VF vscale x 1: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 8 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 8 for VF vscale x 2: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 16 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 16 for VF vscale x 4: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 32 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 32 for VF vscale x 8: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
+; NO-OPT: Cost of 64 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at %l0, ir<%p0>
+; NO-OPT: Cost of 64 for VF vscale x 16: INTERLEAVE-GROUP with factor 2 at <badref>, ir<%p0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
@@ -150,15 +176,24 @@ for.end:
define void @i8_factor_5(ptr %data, i64 %n) {
entry:
br label %for.body
-; CHECK-LABEL: Checking a loop in 'i8_factor_5'
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
-; CHECK: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
-; CHECK: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
-; CHECK: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
-; CHECK: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
-; CHECK: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
-; CHECK: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
-; CHECK: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; OPT-NF5-LABEL: Checking a loop in 'i8_factor_5'
+; OPT-NF5: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; OPT-NF5: Cost of 6 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; OPT-NF5: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; OPT-NF5: Cost of 7 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; OPT-NF5: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; OPT-NF5: Cost of 9 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; OPT-NF5: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; OPT-NF5: Cost of 13 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; NO-OPT-LABEL: Checking a loop in 'i8_factor_5'
+; NO-OPT: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; NO-OPT: Cost of 10 for VF 2: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; NO-OPT: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; NO-OPT: Cost of 20 for VF 4: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; NO-OPT: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; NO-OPT: Cost of 40 for VF 8: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
+; NO-OPT: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at %l0, ir<%p0>
+; NO-OPT: Cost of 80 for VF 16: INTERLEAVE-GROUP with factor 5 at <badref>, ir<%p0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.5, ptr %data, i64 %i, i32 0
@@ -193,15 +228,24 @@ for.end:
define void @i8_factor_6(ptr %data, i64 %n) {
entry:
br label %for.body
-; CHECK-LABEL: Checking a loop in 'i8_factor_6'
-; CHECK: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
-; CHECK: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
-; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
-; CHECK: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
-; CHECK: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
-; CHECK: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
-; CHECK: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
-; CHECK: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; OPT-NF6-LABEL: Checking a loop in 'i8_factor_6'
+; OPT-NF6: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; OPT-NF6: Cost of 7 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; OPT-NF6: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; OPT-NF6: Cost of 8 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; OPT-NF6: Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; OPT-NF6: Cost of 10 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; OPT-NF6: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; OPT-NF6: Cost of 14 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; NO-OPT-LABEL: Checking a loop in 'i8_factor_6'
+; NO-OPT: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; NO-OPT: Cost of 12 for VF 2: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; NO-OPT: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; NO-OPT: Cost of 24 for VF 4: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; NO-OPT: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; NO-OPT: Cost of 48 for VF 8: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
+; NO-OPT: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at %l0, ir<%p0>
+; NO-OPT: Cost of 96 for VF 16: INTERLEAVE-GROUP with factor 6 at <badref>, ir<%p0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.6, ptr %data, i64 %i, i32 0
@@ -240,15 +284,24 @@ for.end:
define void @i8_factor_7(ptr %data, i64 %n) {
entry:
br label %for.body
-; CHECK-LABEL: Checking a loop in 'i8_factor_7'
-; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
-; CHECK: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
-; CHECK: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
-; CHECK: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
-; CHECK: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
-; CHECK: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
-; CHECK: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
-; CHECK: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; OPT-NF7-LABEL: Checking a loop in 'i8_factor_7'
+; OPT-NF7: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; OPT-NF7: Cost of 8 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; OPT-NF7: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; OPT-NF7: Cost of 9 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; OPT-NF7: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; OPT-NF7: Cost of 11 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; OPT-NF7: Cost of 15 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; OPT-NF7: Cost of 15 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; NO-OPT-LABEL: Checking a loop in 'i8_factor_7'
+; NO-OPT: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; NO-OPT: Cost of 14 for VF 2: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; NO-OPT: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; NO-OPT: Cost of 28 for VF 4: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; NO-OPT: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; NO-OPT: Cost of 56 for VF 8: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
+; NO-OPT: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at %l0, ir<%p0>
+; NO-OPT: Cost of 112 for VF 16: INTERLEAVE-GROUP with factor 7 at <badref>, ir<%p0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.7, ptr %data, i64 %i, i32 0
@@ -291,15 +344,24 @@ for.end:
define void @i8_factor_8(ptr %data, i64 %n) {
entry:
br label %for.body
-; CHECK-LABEL: Checking a loop in 'i8_factor_8'
-; CHECK: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
-; CHECK: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
-; CHECK: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
-; CHECK: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
-; CHECK: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
-; CHECK: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
-; CHECK: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
-; CHECK: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; OPT-NF8-LABEL: Checking a loop in 'i8_factor_8'
+; OPT-NF8: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; OPT-NF8: Cost of 9 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; OPT-NF8: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; OPT-NF8: Cost of 10 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; OPT-NF8: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; OPT-NF8: Cost of 12 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; OPT-NF8: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; OPT-NF8: Cost of 16 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; NO-OPT-LABEL: Checking a loop in 'i8_factor_8'
+; NO-OPT: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; NO-OPT: Cost of 16 for VF 2: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; NO-OPT: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; NO-OPT: Cost of 32 for VF 4: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; NO-OPT: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; NO-OPT: Cost of 64 for VF 8: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
+; NO-OPT: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at %l0, ir<%p0>
+; NO-OPT: Cost of 128 for VF 16: INTERLEAVE-GROUP with factor 8 at <badref>, ir<%p0>
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.8, ptr %data, i64 %i, i32 0
More information about the llvm-commits
mailing list