[llvm] 971a450 - [RISCV] Model vlseg/vsseg in interleaved memory ops
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 4 07:05:20 PDT 2023
Author: Luke Lau
Date: 2023-04-04T15:05:14+01:00
New Revision: 971a4501f7f2017816e3365f0008418fd978a9e7
URL: https://github.com/llvm/llvm-project/commit/971a4501f7f2017816e3365f0008418fd978a9e7
DIFF: https://github.com/llvm/llvm-project/commit/971a4501f7f2017816e3365f0008418fd978a9e7.diff
LOG: [RISCV] Model vlseg/vsseg in interleaved memory ops
If the legalized type is a legal interleaved access type (i.e. there's a
supported vlseg/vsseg instruction for it), the interleaved access pass
will pick any interleaved memory op (wide load + shuffles) and lower it
into a vlseg/vsseg intrinsic.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D146522
Added:
Modified:
llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 72f2d67369d89..e90d5d8f873ee 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -436,6 +436,25 @@ InstructionCost RISCVTTIImpl::getInterleavedMemoryOpCost(
getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, CostKind);
unsigned VF = FVTy->getNumElements() / Factor;
+ // Then interleaved memory access pass with lower interleaved memory ops (i.e
+ // a load and store followed by a specific shuffle) to vlseg/vsseg
+ // intrinsics. In those cases then we can treat it as if it's just one (legal)
+ // memory op
+ if (!UseMaskForCond && !UseMaskForGaps &&
+ Factor <= TLI->getMaxSupportedInterleaveFactor()) {
+ std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(FVTy);
+ // Need to make sure type has't been scalarized
+ if (LT.second.isFixedLengthVector()) {
+ auto *LegalFVTy = FixedVectorType::get(FVTy->getElementType(),
+ LT.second.getVectorNumElements());
+ if (TLI->isLegalInterleavedAccessType(LegalFVTy, Factor, DL)) {
+ InstructionCost LegalMemCost = getMemoryOpCost(
+ Opcode, LegalFVTy, Alignment, AddressSpace, CostKind);
+ return LT.first + LegalMemCost;
+ }
+ }
+ }
+
// An interleaved load will look like this for Factor=3:
// %wide.vec = load <12 x i32>, ptr %3, align 4
// %strided.vec = shufflevector %wide.vec, poison, <4 x i32> <stride mask>
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
index 8d51c4f284dc8..9fb2510344402 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-accesses.ll
@@ -152,54 +152,39 @@ exit:
define void @load_store_factor3_i32(ptr %p) {
; CHECK-LABEL: @load_store_factor3_i32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]]
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP5:%.*]] = add <vscale x 4 x i64> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = mul <vscale x 4 x i64> [[TMP5]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP6]]
-; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = mul i64 1, [[TMP8]]
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
-; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 4 x i64> [[VEC_IND]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 3, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 4 x i64> [[TMP10]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
-; CHECK-NEXT: [[TMP12:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP12]], <vscale x 4 x ptr> [[TMP11]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i64> [[TMP10]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP13]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER1:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
-; CHECK-NEXT: [[TMP15:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER1]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 2, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP15]], <vscale x 4 x ptr> [[TMP14]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 4 x i64> [[TMP13]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[P]], <vscale x 4 x i64> [[TMP16]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> [[TMP17]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer), <vscale x 4 x i32> poison)
-; CHECK-NEXT: [[TMP18:%.*]] = add <vscale x 4 x i32> [[WIDE_MASKED_GATHER2]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 3, i64 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x ptr> [[TMP17]], i32 4, <vscale x 4 x i1> shufflevector (<vscale x 4 x i1> insertelement (<vscale x 4 x i1> poison, i1 true, i64 0), <vscale x 4 x i1> poison, <vscale x 4 x i32> zeroinitializer))
-; CHECK-NEXT: [[TMP19:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP20:%.*]] = mul i64 [[TMP19]], 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP20]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
-; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 3
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[TMP2]], i32 0
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <6 x i32>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> <i32 0, i32 3>
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> <i32 1, i32 4>
+; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <6 x i32> [[WIDE_VEC]], <6 x i32> poison, <2 x i32> <i32 2, i32 5>
+; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> [[STRIDED_VEC]], <i32 1, i32 1>
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add <2 x i32> [[STRIDED_VEC1]], <i32 2, i32 2>
+; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP5]], 1
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[P]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = add <2 x i32> [[STRIDED_VEC2]], <i32 3, i32 3>
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[TMP8]], i32 -2
+; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> [[TMP12]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <6 x i32> [[TMP13]], <6 x i32> poison, <6 x i32> <i32 0, i32 2, i32 4, i32 1, i32 3, i32 5>
+; CHECK-NEXT: store <6 x i32> [[INTERLEAVED_VEC]], ptr [[TMP10]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ]
@@ -513,19 +498,30 @@ define void @combine_load_factor2_i64(ptr noalias %p, ptr noalias %q) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = shl i64 [[TMP0]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP1]]
-; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i64, ptr [[TMP2]], i32 0
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP3]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = shl i64 [[TMP0]], 1
+; CHECK-NEXT: [[TMP3:%.*]] = shl i64 [[TMP1]], 1
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[P]], i64 [[TMP3]]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i64>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[WIDE_VEC1:%.*]] = load <8 x i64>, ptr [[TMP7]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC1]]
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i64, ptr [[TMP5]], i32 0
-; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[TMP6]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <8 x i64> [[WIDE_VEC]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <8 x i64> [[WIDE_VEC1]], <8 x i64> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[TMP8:%.*]] = add <4 x i64> [[STRIDED_VEC]], [[STRIDED_VEC3]]
+; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i64> [[STRIDED_VEC2]], [[STRIDED_VEC4]]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i64, ptr [[Q:%.*]], i64 [[TMP0]]
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i64, ptr [[Q]], i64 [[TMP1]]
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[TMP10]], i32 0
+; CHECK-NEXT: store <4 x i64> [[TMP8]], ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i64, ptr [[TMP10]], i32 4
+; CHECK-NEXT: store <4 x i64> [[TMP9]], ptr [[TMP13]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024
; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
index 327d622ce4301..3de06ae1dbc0d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/interleaved-cost.ll
@@ -9,25 +9,25 @@ define void @i8_factor_2(ptr %data, i64 %n) {
entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i8_factor_2'
-; VF_2: Found an estimated cost of 3 for VF 2 For instruction: %l0 = load i8, ptr %p0, align 1
+; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %l1 = load i8, ptr %p1, align 1
; VF_2: Found an estimated cost of 0 for VF 2 For instruction: store i8 %a0, ptr %p0, align 1
-; VF_2-NEXT: Found an estimated cost of 3 for VF 2 For instruction: store i8 %a1, ptr %p1, align 1
+; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i8 %a1, ptr %p1, align 1
; VF_4-LABEL: Checking a loop in 'i8_factor_2'
-; VF_4: Found an estimated cost of 3 for VF 4 For instruction: %l0 = load i8, ptr %p0, align 1
+; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %l1 = load i8, ptr %p1, align 1
; VF_4: Found an estimated cost of 0 for VF 4 For instruction: store i8 %a0, ptr %p0, align 1
-; VF_4-NEXT: Found an estimated cost of 3 for VF 4 For instruction: store i8 %a1, ptr %p1, align 1
+; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i8 %a1, ptr %p1, align 1
; VF_8-LABEL: Checking a loop in 'i8_factor_2'
-; VF_8: Found an estimated cost of 3 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1
+; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l1 = load i8, ptr %p1, align 1
; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a0, ptr %p0, align 1
-; VF_8-NEXT: Found an estimated cost of 3 for VF 8 For instruction: store i8 %a1, ptr %p1, align 1
+; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %a1, ptr %p1, align 1
; VF_16-LABEL: Checking a loop in 'i8_factor_2'
-; VF_16: Found an estimated cost of 3 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1
+; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %l1 = load i8, ptr %p1, align 1
; VF_16: Found an estimated cost of 0 for VF 16 For instruction: store i8 %a0, ptr %p0, align 1
-; VF_16-NEXT: Found an estimated cost of 5 for VF 16 For instruction: store i8 %a1, ptr %p1, align 1
+; VF_16-NEXT: Found an estimated cost of 2 for VF 16 For instruction: store i8 %a1, ptr %p1, align 1
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%p0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
@@ -51,26 +51,26 @@ define void @i8_factor_3(ptr %data, i64 %n) {
entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i8_factor_3'
-; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %l0 = load i8, ptr %p0, align 1
+; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %l1 = load i8, ptr %p1, align 1
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %l2 = load i8, ptr %p2, align 1
; VF_2: Found an estimated cost of 0 for VF 2 For instruction: store i8 %a0, ptr %p0, align 1
; VF_2: Found an estimated cost of 0 for VF 2 For instruction: store i8 %a1, ptr %p1, align 1
-; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store i8 %a2, ptr %p2, align 1
+; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i8 %a2, ptr %p2, align 1
; VF_4-LABEL: Checking a loop in 'i8_factor_3'
-; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %l0 = load i8, ptr %p0, align 1
+; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %l1 = load i8, ptr %p1, align 1
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %l2 = load i8, ptr %p2, align 1
; VF_4: Found an estimated cost of 0 for VF 4 For instruction: store i8 %a0, ptr %p0, align 1
; VF_4: Found an estimated cost of 0 for VF 4 For instruction: store i8 %a1, ptr %p1, align 1
-; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store i8 %a2, ptr %p2, align 1
+; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i8 %a2, ptr %p2, align 1
; VF_8-LABEL: Checking a loop in 'i8_factor_3'
-; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1
+; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l1 = load i8, ptr %p1, align 1
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %l2 = load i8, ptr %p2, align 1
; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a0, ptr %p0, align 1
; VF_8: Found an estimated cost of 0 for VF 8 For instruction: store i8 %a1, ptr %p1, align 1
-; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store i8 %a2, ptr %p2, align 1
+; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 %a2, ptr %p2, align 1
; VF_16-LABEL: Checking a loop in 'i8_factor_3'
; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %l0 = load i8, ptr %p0, align 1
; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %l1 = load i8, ptr %p1, align 1
More information about the llvm-commits
mailing list