[llvm] [DAG] replaceShuffleOfInsert - add support for shuffle_vector(scalar_to_vector(x),y) -> insert_vector_elt(y,x,c) (PR #127210)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 14 05:40:56 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/127210
Begin extending replaceShuffleOfInsert to handle other forms of scalar insertion into a vector.
I've limited this to targets that have Custom/Legal ISD::INSERT_VECTOR_ELT handling for now - although we can probably always fold this before LegalOperations.
>From 3c657838ee76b92695694c39544c142dbe36a67b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 14 Feb 2025 13:39:43 +0000
Subject: [PATCH] [DAG] replaceShuffleOfInsert - add support for
shuffle_vector(scalar_to_vector(x),y) -> insert_vector_elt(y,x,c)
Begin extending replaceShuffleOfInsert to handle other forms of scalar insertion into a vector.
I've limited this to targets that just have Custom/Legal ISD::INSERT_VECTOR_ELT handling for now - although we can probably always fold this before LegalOperations.
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 18 +++++++---
.../PowerPC/v4i32_scalar_to_vector_shuffle.ll | 34 ++++++-------------
2 files changed, 25 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c6fd72b6b76f4..82c4cbf793ee7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -626,6 +626,7 @@ namespace {
SDValue CombineZExtLogicopShiftLoad(SDNode *N);
SDValue combineRepeatedFPDivisors(SDNode *N);
SDValue combineFMulOrFDivWithIntPow2(SDNode *N);
+ SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf);
SDValue mergeInsertEltWithShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
SDValue combineInsertEltToLoad(SDNode *N, unsigned InsIndex);
@@ -26102,8 +26103,7 @@ static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
/// If a shuffle inserts exactly one element from a source vector operand into
/// another vector operand and we can access the specified element as a scalar,
/// then we can eliminate the shuffle.
-static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
- SelectionDAG &DAG) {
+SDValue DAGCombiner::replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf) {
// First, check if we are taking one element of a vector and shuffling that
// element into another vector.
ArrayRef<int> Mask = Shuf->getMask();
@@ -26126,7 +26126,7 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
// Now see if we can access that element as a scalar via a real insert element
// instruction.
// TODO: We can try harder to locate the element as a scalar. Examples: it
- // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
+ // could be an operand of BUILD_VECTOR, or a constant.
assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
"Shuffle mask value must be from operand 0");
@@ -26149,6 +26149,16 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
Op1, Elt, NewInsIndex);
}
+ if (!hasOperation(ISD::INSERT_VECTOR_ELT, Op0.getValueType()))
+ return SDValue();
+
+ if (sd_match(Op0, m_UnaryOp(ISD::SCALAR_TO_VECTOR, m_Value(Elt))) &&
+ Mask[ShufOp0Index] == 0) {
+ SDValue NewInsIndex = DAG.getVectorIdxConstant(ShufOp0Index, SDLoc(Shuf));
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
+ Op1, Elt, NewInsIndex);
+ }
+
return SDValue();
}
@@ -26220,7 +26230,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
}
- if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
+ if (SDValue InsElt = replaceShuffleOfInsert(SVN))
return InsElt;
// A shuffle of a single vector that is a splatted value can always be folded.
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
index 402a4f34e62b2..d98b78dfdd3b0 100644
--- a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -239,13 +239,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-LE-P9-LABEL: test_none_v4i32:
; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: li r3, 0
-; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2
-; CHECK-LE-P9-NEXT: mtfprwz f0, r3
; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-LE-P9-NEXT: xxperm v2, vs0, vs1
+; CHECK-LE-P9-NEXT: lxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: xxperm v2, v2, vs0
; CHECK-LE-P9-NEXT: stxv v2, 0(r5)
; CHECK-LE-P9-NEXT: blr
;
@@ -263,14 +260,11 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-BE-P9-LABEL: test_none_v4i32:
; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: li r3, 0
-; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2
-; CHECK-BE-P9-NEXT: mtfprwz f0, r3
; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
-; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
-; CHECK-BE-P9-NEXT: xxperm vs0, v2, vs1
-; CHECK-BE-P9-NEXT: stxv vs0, 0(r5)
+; CHECK-BE-P9-NEXT: lxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: xxperm v2, v2, vs0
+; CHECK-BE-P9-NEXT: stxv v2, 0(r5)
; CHECK-BE-P9-NEXT: blr
;
; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
@@ -286,13 +280,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: li r4, 0
-; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
-; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-AIX-64-P9-NEXT: xxperm vs0, v2, vs1
-; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-AIX-64-P9-NEXT: xxperm v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-64-P9-NEXT: blr
;
; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
@@ -308,13 +299,10 @@ define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
;
; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
-; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lfiwzx f0, 0, r4
; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r4)
-; CHECK-AIX-32-P9-NEXT: xxperm vs0, v2, vs1
-; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxperm v2, v2, vs0
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
; CHECK-AIX-32-P9-NEXT: blr
entry:
%0 = extractelement <2 x i32> %vec, i64 0
More information about the llvm-commits
mailing list