[llvm] [VectorCombine] Fold vector.interleave2 with two constant splats (PR #125144)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 10:28:33 PST 2025
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/125144
>From 5130f96b819806fce0b7135000ac3b6084b0106c Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Thu, 30 Jan 2025 16:27:54 -0800
Subject: [PATCH 1/3] [VectorCombine] Fold vector.interleave2 with two constant
splats
TBA...
---
.../Transforms/Vectorize/VectorCombine.cpp | 41 +++++++++++++++++++
.../RISCV/vector-interleave2-splat.ll | 14 +++++++
2 files changed, 55 insertions(+)
create mode 100644 llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 59920b5a4dd20ab..fd49620b5e3ac3c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -125,6 +125,7 @@ class VectorCombine {
bool foldShuffleFromReductions(Instruction &I);
bool foldCastFromReductions(Instruction &I);
bool foldSelectShuffle(Instruction &I, bool FromReduction = false);
+ bool foldInterleaveIntrinsics(Instruction &I);
bool shrinkType(Instruction &I);
void replaceValue(Value &Old, Value &New) {
@@ -3145,6 +3146,45 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
return true;
}
+bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
+ // If we're interleaving 2 constant splats, for instance `<vscale x 8 x i32>
+ // <splat of 666>` and `<vscale x 8 x i32> <splat of 777>`, we can create a
+ // larger splat
+ // `<vscale x 8 x i64> <splat of ((777 << 32) | 666)>` first before casting it
+ // back into `<vscale x 16 x i32>`.
+ using namespace PatternMatch;
+ const APInt *SplatVal0, *SplatVal1;
+ if (!match(&I, m_Intrinsic<Intrinsic::vector_interleave2>(
+ m_APInt(SplatVal0), m_APInt(SplatVal1))))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "VC: Folding interleave2 with two splats: " << I
+ << "\n");
+
+ auto *VTy =
+ cast<VectorType>(cast<IntrinsicInst>(I).getArgOperand(0)->getType());
+ auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
+ unsigned Width = VTy->getElementType()->getIntegerBitWidth();
+
+ if (TTI.getInstructionCost(&I, CostKind) <
+ TTI.getCastInstrCost(Instruction::BitCast, I.getType(), ExtVTy,
+ TTI::CastContextHint::None, CostKind)) {
+ LLVM_DEBUG(dbgs() << "VC: The cost to cast from " << *ExtVTy << " to "
+ << *I.getType() << " is too high.\n");
+ return false;
+ }
+
+ APInt NewSplatVal = SplatVal1->zext(Width * 2);
+ NewSplatVal <<= Width;
+ NewSplatVal |= SplatVal0->zext(Width * 2);
+ auto *NewSplat = ConstantVector::getSplat(
+ ExtVTy->getElementCount(), ConstantInt::get(F.getContext(), NewSplatVal));
+
+ IRBuilder<> Builder(&I);
+ replaceValue(I, *Builder.CreateBitCast(NewSplat, I.getType()));
+ return true;
+}
+
/// This is the entry point for all transforms. Pass manager differences are
/// handled in the callers of this function.
bool VectorCombine::run() {
@@ -3189,6 +3229,7 @@ bool VectorCombine::run() {
MadeChange |= scalarizeBinopOrCmp(I);
MadeChange |= scalarizeLoadExtract(I);
MadeChange |= scalarizeVPIntrinsic(I);
+ MadeChange |= foldInterleaveIntrinsics(I);
}
if (Opcode == Instruction::Store)
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll b/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll
new file mode 100644
index 000000000000000..f2eb4e4e2dbc855
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll
@@ -0,0 +1,14 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=riscv64 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
+; RUN: opt -S -mtriple=riscv32 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
+
+define void @store_factor2_const_splat(ptr %dst) {
+; CHECK-LABEL: define void @store_factor2_const_splat(
+; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> bitcast (<vscale x 8 x i64> splat (i64 3337189589658) to <vscale x 16 x i32>), ptr [[DST]], <vscale x 16 x i1> splat (i1 true), i32 88)
+; CHECK-NEXT: ret void
+;
+ %interleave2 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> splat (i32 666), <vscale x 8 x i32> splat (i32 777))
+ call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %interleave2, ptr %dst, <vscale x 16 x i1> splat (i1 true), i32 88)
+ ret void
+}
>From 0904d1572688b893a823388b073adb27fe969ecd Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 31 Jan 2025 10:16:11 -0800
Subject: [PATCH 2/3] fixup! Add more test cases
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 6 +++++-
.../RISCV/vector-interleave2-splat-e64.ll | 15 +++++++++++++++
.../RISCV/vector-interleave2-splat.ll | 11 +++++++++--
3 files changed, 29 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat-e64.ll
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index fd49620b5e3ac3c..91123a9d54e236f 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3166,7 +3166,11 @@ bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
auto *ExtVTy = VectorType::getExtendedElementVectorType(VTy);
unsigned Width = VTy->getElementType()->getIntegerBitWidth();
- if (TTI.getInstructionCost(&I, CostKind) <
+ // Just in case the cost of interleave2 intrinsic and bitcast are both
+ // invalid, in which case we want to bail out, we use <= rather
+ // than < here. Even they both have valid and equal costs, it's probably
+ // not a good idea to emit a high-cost constant splat.
+ if (TTI.getInstructionCost(&I, CostKind) <=
TTI.getCastInstrCost(Instruction::BitCast, I.getType(), ExtVTy,
TTI::CastContextHint::None, CostKind)) {
LLVM_DEBUG(dbgs() << "VC: The cost to cast from " << *ExtVTy << " to "
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat-e64.ll b/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat-e64.ll
new file mode 100644
index 000000000000000..ede94a221b7b084
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat-e64.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -mtriple=riscv64 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
+; RUN: opt -S -mtriple=riscv32 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
+
+define void @interleave2_const_splat_nxv8i64(ptr %dst) {
+; CHECK-LABEL: define void @interleave2_const_splat_nxv8i64(
+; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[INTERLEAVE2:%.*]] = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> splat (i64 666), <vscale x 4 x i64> splat (i64 777))
+; CHECK-NEXT: call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> [[INTERLEAVE2]], ptr [[DST]], <vscale x 8 x i1> splat (i1 true), i32 88)
+; CHECK-NEXT: ret void
+;
+ %interleave2 = call <vscale x 8 x i64> @llvm.vector.interleave2.nxv8i64(<vscale x 4 x i64> splat (i64 666), <vscale x 4 x i64> splat (i64 777))
+ call void @llvm.vp.store.nxv8i64.p0(<vscale x 8 x i64> %interleave2, ptr %dst, <vscale x 8 x i1> splat (i1 true), i32 88)
+ ret void
+}
diff --git a/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll b/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll
index f2eb4e4e2dbc855..5c55ef0eb66cf44 100644
--- a/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll
+++ b/llvm/test/Transforms/VectorCombine/RISCV/vector-interleave2-splat.ll
@@ -1,12 +1,19 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -mtriple=riscv64 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
; RUN: opt -S -mtriple=riscv32 -mattr=+v,+m,+zvfh %s -passes=vector-combine | FileCheck %s
+; RUN: opt -S -mtriple=riscv64 -mattr=+m,+zve32x %s -passes=vector-combine | FileCheck %s --check-prefix=ZVE32X
-define void @store_factor2_const_splat(ptr %dst) {
-; CHECK-LABEL: define void @store_factor2_const_splat(
+define void @interleave2_const_splat_nxv16i32(ptr %dst) {
+; CHECK-LABEL: define void @interleave2_const_splat_nxv16i32(
; CHECK-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> bitcast (<vscale x 8 x i64> splat (i64 3337189589658) to <vscale x 16 x i32>), ptr [[DST]], <vscale x 16 x i1> splat (i1 true), i32 88)
; CHECK-NEXT: ret void
+;
+; ZVE32X-LABEL: define void @interleave2_const_splat_nxv16i32(
+; ZVE32X-SAME: ptr [[DST:%.*]]) #[[ATTR0:[0-9]+]] {
+; ZVE32X-NEXT: [[INTERLEAVE2:%.*]] = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> splat (i32 666), <vscale x 8 x i32> splat (i32 777))
+; ZVE32X-NEXT: call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> [[INTERLEAVE2]], ptr [[DST]], <vscale x 16 x i1> splat (i1 true), i32 88)
+; ZVE32X-NEXT: ret void
;
%interleave2 = call <vscale x 16 x i32> @llvm.vector.interleave2.nxv16i32(<vscale x 8 x i32> splat (i32 666), <vscale x 8 x i32> splat (i32 777))
call void @llvm.vp.store.nxv16i32.p0(<vscale x 16 x i32> %interleave2, ptr %dst, <vscale x 16 x i1> splat (i1 true), i32 88)
>From 8f129112841147abc844c8408d9283f46e53761c Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 31 Jan 2025 10:25:42 -0800
Subject: [PATCH 3/3] fixup! Address review comment
---
llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 91123a9d54e236f..fd6b5303a257030 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -3146,13 +3146,11 @@ bool VectorCombine::foldInsExtVectorToShuffle(Instruction &I) {
return true;
}
+/// If we're interleaving 2 constant splats, for instance `<vscale x 8 x i32>
+/// <splat of 666>` and `<vscale x 8 x i32> <splat of 777>`, we can create a
+/// larger splat `<vscale x 8 x i64> <splat of ((777 << 32) | 666)>` first
+/// before casting it back into `<vscale x 16 x i32>`.
bool VectorCombine::foldInterleaveIntrinsics(Instruction &I) {
- // If we're interleaving 2 constant splats, for instance `<vscale x 8 x i32>
- // <splat of 666>` and `<vscale x 8 x i32> <splat of 777>`, we can create a
- // larger splat
- // `<vscale x 8 x i64> <splat of ((777 << 32) | 666)>` first before casting it
- // back into `<vscale x 16 x i32>`.
- using namespace PatternMatch;
const APInt *SplatVal0, *SplatVal1;
if (!match(&I, m_Intrinsic<Intrinsic::vector_interleave2>(
m_APInt(SplatVal0), m_APInt(SplatVal1))))
More information about the llvm-commits
mailing list