[llvm] [SLPVectorizer][X86] Free load cost for stores with constant pointers (PR #118016)
Antonio Frighetto via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 28 07:35:16 PST 2024
https://github.com/antoniofrighetto created https://github.com/llvm/llvm-project/pull/118016
When estimating the cost for stores of constant buildvectors, do not take into account the cost of the additional load to materialize a vector from a constant pool when dealing with a constant pointer. In such cases, the load is avoided in the first place, as the only operations required simply involve computing the address of the constant (`rip+base_addr+offset`) and the store itself.
Fixes regression: https://github.com/llvm/llvm-project/issues/111126.
>From a6c9bf06b6e9b12e7dd3ee536f51353fce0a4e99 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Thu, 28 Nov 2024 15:31:36 +0100
Subject: [PATCH] [SLPVectorizer][X86] Free load cost for stores with constant
pointers
When estimating the cost for stores of constant buildvectors, do not
take into account the cost of the additional load to materialize a
vector from a constant pool when dealing with a constant pointer.
In such cases, the load is avoided in the first place, as the only
operations required simply involve computing the address of the
constant (`rip+base_addr+offset`) and the store itself.
Fixes regression: https://github.com/llvm/llvm-project/issues/111126.
---
.../lib/Target/X86/X86TargetTransformInfo.cpp | 5 +++--
.../Transforms/LoopUnroll/unroll-cleanup.ll | 12 +++--------
.../X86/buildvector_store_constant.ll | 20 +++++++++++++++++++
3 files changed, 26 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 179e29e40614e7..689355a168c2a1 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5157,8 +5157,9 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
InstructionCost Cost = 0;
- // Add a cost for constant load to vector.
- if (Opcode == Instruction::Store && OpInfo.isConstant())
+ // Add a cost for constant load to vector, if pointer is not a constant.
+ if (auto *SI = dyn_cast_or_null<StoreInst>(I);
+ SI && !isa<Constant>(SI->getPointerOperand()) && OpInfo.isConstant())
Cost += getMemoryOpCost(Instruction::Load, Src, DL.getABITypeAlign(Src),
/*AddressSpace=*/0, CostKind);
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
index da1808fc278c09..75829cb71814f6 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
@@ -57,9 +57,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
; CHECK-NEXT: [[CMP_PROL:%.*]] = icmp eq i8 [[TMP3_PROL]], 0
; CHECK-NEXT: br i1 [[CMP_PROL]], label %[[IF_THEN_PROL:.*]], label %[[FOR_INC_PROL:.*]]
; CHECK: [[IF_THEN_PROL]]:
-; CHECK-NEXT: [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 2
-; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX_PROL]], align 2
-; CHECK-NEXT: store i16 0, ptr [[R_022]], align 2
+; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[R_022]], align 2
; CHECK-NEXT: [[ARRAYIDX5_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 4
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5_PROL]], align 2
; CHECK-NEXT: br label %[[FOR_INC_PROL]]
@@ -82,9 +80,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP3]], 0
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC:.*]]
; CHECK: [[IF_THEN]]:
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 2
-; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX]], align 2
-; CHECK-NEXT: store i16 0, ptr [[R_117]], align 2
+; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[R_117]], align 2
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 4
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5]], align 2
; CHECK-NEXT: br label %[[FOR_INC]]
@@ -96,9 +92,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
; CHECK-NEXT: br i1 [[CMP_1]], label %[[IF_THEN_1:.*]], label %[[FOR_INC_1]]
; CHECK: [[IF_THEN_1]]:
; CHECK-NEXT: [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 6
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 8
-; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX_1]], align 2
-; CHECK-NEXT: store i16 0, ptr [[ADD_PTR]], align 2
+; CHECK-NEXT: store <2 x i16> zeroinitializer, ptr [[ADD_PTR]], align 2
; CHECK-NEXT: [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 10
; CHECK-NEXT: store i16 0, ptr [[ARRAYIDX5_1]], align 2
; CHECK-NEXT: br label %[[FOR_INC_1]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll
new file mode 100644
index 00000000000000..1d9e4d20d20af6
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu --passes=slp-vectorizer -S -o - %s | FileCheck %s
+
+ at arr = global [20 x i64] zeroinitializer, align 16
+
+define void @store_from_constant_ptr() {
+; CHECK-LABEL: define void @store_from_constant_ptr() {
+; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr @arr, align 16
+; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16
+; CHECK-NEXT: store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16
+; CHECK-NEXT: ret void
+;
+ store i64 1, ptr @arr, align 16
+ store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 8), align 8
+ store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16
+ store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 24), align 8
+ store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16
+ store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 40), align 8
+ ret void
+}
More information about the llvm-commits
mailing list