[llvm] [SLPVectorizer][X86] Free load cost for stores with constant pointers (PR #118016)

Antonio Frighetto via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 28 07:35:16 PST 2024


https://github.com/antoniofrighetto created https://github.com/llvm/llvm-project/pull/118016

When estimating the cost for stores of constant buildvectors, do not take into account the cost of the additional load to materialize a vector from a constant pool when dealing with a constant pointer. In such cases, the load is avoided in the first place, as the only operations required simply involve computing the address of the constant (`rip+base_addr+offset`) and the store itself.

Fixes regression: https://github.com/llvm/llvm-project/issues/111126.

>From a6c9bf06b6e9b12e7dd3ee536f51353fce0a4e99 Mon Sep 17 00:00:00 2001
From: Antonio Frighetto <me at antoniofrighetto.com>
Date: Thu, 28 Nov 2024 15:31:36 +0100
Subject: [PATCH] [SLPVectorizer][X86] Free load cost for stores with constant
 pointers

When estimating the cost for stores of constant buildvectors, do not
take into account the cost of the additional load to materialize a
vector from a constant pool when dealing with a constant pointer.
In such cases, the load is avoided in the first place, as the only
operations required simply involve computing the address of the
constant (`rip+base_addr+offset`) and the store itself.

Fixes regression: https://github.com/llvm/llvm-project/issues/111126.
---
 .../lib/Target/X86/X86TargetTransformInfo.cpp |  5 +++--
 .../Transforms/LoopUnroll/unroll-cleanup.ll   | 12 +++--------
 .../X86/buildvector_store_constant.ll         | 20 +++++++++++++++++++
 3 files changed, 26 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll

diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 179e29e40614e7..689355a168c2a1 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5157,8 +5157,9 @@ InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
 
   InstructionCost Cost = 0;
 
-  // Add a cost for constant load to vector.
-  if (Opcode == Instruction::Store && OpInfo.isConstant())
+  // Add a cost for constant load to vector, if pointer is not a constant.
+  if (auto *SI = dyn_cast_or_null<StoreInst>(I);
+      SI && !isa<Constant>(SI->getPointerOperand()) && OpInfo.isConstant())
     Cost += getMemoryOpCost(Instruction::Load, Src, DL.getABITypeAlign(Src),
                             /*AddressSpace=*/0, CostKind);
 
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
index da1808fc278c09..75829cb71814f6 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-cleanup.ll
@@ -57,9 +57,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK-NEXT:    [[CMP_PROL:%.*]] = icmp eq i8 [[TMP3_PROL]], 0
 ; CHECK-NEXT:    br i1 [[CMP_PROL]], label %[[IF_THEN_PROL:.*]], label %[[FOR_INC_PROL:.*]]
 ; CHECK:       [[IF_THEN_PROL]]:
-; CHECK-NEXT:    [[ARRAYIDX_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 2
-; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX_PROL]], align 2
-; CHECK-NEXT:    store i16 0, ptr [[R_022]], align 2
+; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[R_022]], align 2
 ; CHECK-NEXT:    [[ARRAYIDX5_PROL:%.*]] = getelementptr inbounds i8, ptr [[R_022]], i64 4
 ; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX5_PROL]], align 2
 ; CHECK-NEXT:    br label %[[FOR_INC_PROL]]
@@ -82,9 +80,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i8 [[TMP3]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[FOR_INC:.*]]
 ; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 2
-; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX]], align 2
-; CHECK-NEXT:    store i16 0, ptr [[R_117]], align 2
+; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[R_117]], align 2
 ; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 4
 ; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX5]], align 2
 ; CHECK-NEXT:    br label %[[FOR_INC]]
@@ -96,9 +92,7 @@ define void @_Z3fn1v(ptr %r, ptr %a) #0 {
 ; CHECK-NEXT:    br i1 [[CMP_1]], label %[[IF_THEN_1:.*]], label %[[FOR_INC_1]]
 ; CHECK:       [[IF_THEN_1]]:
 ; CHECK-NEXT:    [[ADD_PTR:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 6
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 8
-; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX_1]], align 2
-; CHECK-NEXT:    store i16 0, ptr [[ADD_PTR]], align 2
+; CHECK-NEXT:    store <2 x i16> zeroinitializer, ptr [[ADD_PTR]], align 2
 ; CHECK-NEXT:    [[ARRAYIDX5_1:%.*]] = getelementptr inbounds i8, ptr [[R_117]], i64 10
 ; CHECK-NEXT:    store i16 0, ptr [[ARRAYIDX5_1]], align 2
 ; CHECK-NEXT:    br label %[[FOR_INC_1]]
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll
new file mode 100644
index 00000000000000..1d9e4d20d20af6
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector_store_constant.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu --passes=slp-vectorizer -S -o - %s | FileCheck %s
+
+ at arr = global [20 x i64] zeroinitializer, align 16
+
+define void @store_from_constant_ptr() {
+; CHECK-LABEL: define void @store_from_constant_ptr() {
+; CHECK-NEXT:    store <2 x i64> splat (i64 1), ptr @arr, align 16
+; CHECK-NEXT:    store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16
+; CHECK-NEXT:    store <2 x i64> splat (i64 1), ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16
+; CHECK-NEXT:    ret void
+;
+  store i64 1, ptr @arr, align 16
+  store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 8), align 8
+  store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 16), align 16
+  store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 24), align 8
+  store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 32), align 16
+  store i64 1, ptr getelementptr inbounds (i8, ptr @arr, i64 40), align 8
+  ret void
+}



More information about the llvm-commits mailing list