[llvm] [X86] canonicalizeShuffleWithOp - add handling for X86ISD::VPERMV nodes (PR #127625)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 04:17:26 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/127625
Handle different src/mask operand ordering
>From b16eaf6c500043fdb2267a53c4cdf7ed1bfbcde3 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Tue, 18 Feb 2025 12:13:41 +0000
Subject: [PATCH] [X86] canonicalizeShuffleWithOp - add handling for
X86ISD::VPERMV nodes
Handle different src/mask operand ordering
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 27 ++++++++++++-------
llvm/test/CodeGen/X86/vector-partial-undef.ll | 2 +-
.../CodeGen/X86/vector-shuffle-combining.ll | 2 +-
3 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 386d56dcda9de..696bb14292dd0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41629,23 +41629,28 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
+ case X86ISD::VPERMV:
case X86ISD::VPERMI:
case X86ISD::VPERMILPI: {
- if (N.getOperand(0).getValueType() == ShuffleVT &&
- N->isOnlyUserOf(N.getOperand(0).getNode())) {
- SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+ unsigned SrcIdx = Opc == X86ISD::VPERMV ? 1 : 0;
+ if (N.getOperand(SrcIdx).getValueType() == ShuffleVT &&
+ N->isOnlyUserOf(N.getOperand(SrcIdx).getNode())) {
+ SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(SrcIdx));
unsigned SrcOpcode = N0.getOpcode();
EVT OpVT = N0.getValueType();
if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
- bool FoldShuf = Opc != X86ISD::VPERMI;
+ bool FoldShuf = Opc != X86ISD::VPERMI && Opc != X86ISD::VPERMV;
if (IsMergeableWithShuffle(Op00, FoldShuf) ||
IsMergeableWithShuffle(Op01, FoldShuf)) {
SDValue LHS, RHS;
Op00 = DAG.getBitcast(ShuffleVT, Op00);
Op01 = DAG.getBitcast(ShuffleVT, Op01);
- if (N.getNumOperands() == 2) {
+ if (Opc == X86ISD::VPERMV) {
+ LHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op00);
+ RHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op01);
+ } else if (N.getNumOperands() == 2) {
LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1));
RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1));
} else {
@@ -41661,11 +41666,13 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
if (SrcOpcode == ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
OpVT.getScalarSizeInBits() ==
N0.getOperand(0).getScalarValueSizeInBits()) {
- SDValue Op00 = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
- SDValue Res =
- N.getNumOperands() == 2
- ? DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1))
- : DAG.getNode(Opc, DL, ShuffleVT, Op00);
+ SDValue Res = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
+ if (Opc == X86ISD::VPERMV)
+ Res = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Res);
+ else if (N.getNumOperands() == 2)
+ Res = DAG.getNode(Opc, DL, ShuffleVT, Res, N.getOperand(1));
+ else
+ Res = DAG.getNode(Opc, DL, ShuffleVT, Res);
Res = DAG.getBitcast(N0.getOperand(0).getValueType(), Res);
return DAG.getBitcast(ShuffleVT, DAG.getNode(SrcOpcode, DL, OpVT, Res));
}
diff --git a/llvm/test/CodeGen/X86/vector-partial-undef.ll b/llvm/test/CodeGen/X86/vector-partial-undef.ll
index fd41fd53e3be1..4753dba2d468f 100644
--- a/llvm/test/CodeGen/X86/vector-partial-undef.ll
+++ b/llvm/test/CodeGen/X86/vector-partial-undef.ll
@@ -151,9 +151,9 @@ define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef>
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 07c770abc65d6..05b0a7c10b4e1 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2469,10 +2469,10 @@ define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
;
; AVX2-FAST-ALL-LABEL: combine_unneeded_subvector1:
; AVX2-FAST-ALL: # %bb.0:
-; AVX2-FAST-ALL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; AVX2-FAST-ALL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: combine_unneeded_subvector1:
More information about the llvm-commits
mailing list