[llvm] [X86] canonicalizeShuffleWithOp - add handling for X86ISD::VPERMV nodes (PR #127625)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 04:18:08 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Handle different src/mask operand ordering
---
Full diff: https://github.com/llvm/llvm-project/pull/127625.diff
3 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+17-10)
- (modified) llvm/test/CodeGen/X86/vector-partial-undef.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/vector-shuffle-combining.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 386d56dcda9de..696bb14292dd0 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41629,23 +41629,28 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
case X86ISD::PSHUFD:
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
+ case X86ISD::VPERMV:
case X86ISD::VPERMI:
case X86ISD::VPERMILPI: {
- if (N.getOperand(0).getValueType() == ShuffleVT &&
- N->isOnlyUserOf(N.getOperand(0).getNode())) {
- SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0));
+ unsigned SrcIdx = Opc == X86ISD::VPERMV ? 1 : 0;
+ if (N.getOperand(SrcIdx).getValueType() == ShuffleVT &&
+ N->isOnlyUserOf(N.getOperand(SrcIdx).getNode())) {
+ SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(SrcIdx));
unsigned SrcOpcode = N0.getOpcode();
EVT OpVT = N0.getValueType();
if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) {
SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0));
SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1));
- bool FoldShuf = Opc != X86ISD::VPERMI;
+ bool FoldShuf = Opc != X86ISD::VPERMI && Opc != X86ISD::VPERMV;
if (IsMergeableWithShuffle(Op00, FoldShuf) ||
IsMergeableWithShuffle(Op01, FoldShuf)) {
SDValue LHS, RHS;
Op00 = DAG.getBitcast(ShuffleVT, Op00);
Op01 = DAG.getBitcast(ShuffleVT, Op01);
- if (N.getNumOperands() == 2) {
+ if (Opc == X86ISD::VPERMV) {
+ LHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op00);
+ RHS = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Op01);
+ } else if (N.getNumOperands() == 2) {
LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1));
RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1));
} else {
@@ -41661,11 +41666,13 @@ static SDValue canonicalizeShuffleWithOp(SDValue N, SelectionDAG &DAG,
if (SrcOpcode == ISD::SINT_TO_FP && IsSafeToMoveShuffle(N0, SrcOpcode) &&
OpVT.getScalarSizeInBits() ==
N0.getOperand(0).getScalarValueSizeInBits()) {
- SDValue Op00 = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
- SDValue Res =
- N.getNumOperands() == 2
- ? DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1))
- : DAG.getNode(Opc, DL, ShuffleVT, Op00);
+ SDValue Res = DAG.getBitcast(ShuffleVT, N0.getOperand(0));
+ if (Opc == X86ISD::VPERMV)
+ Res = DAG.getNode(Opc, DL, ShuffleVT, N.getOperand(0), Res);
+ else if (N.getNumOperands() == 2)
+ Res = DAG.getNode(Opc, DL, ShuffleVT, Res, N.getOperand(1));
+ else
+ Res = DAG.getNode(Opc, DL, ShuffleVT, Res);
Res = DAG.getBitcast(N0.getOperand(0).getValueType(), Res);
return DAG.getBitcast(ShuffleVT, DAG.getNode(SrcOpcode, DL, OpVT, Res));
}
diff --git a/llvm/test/CodeGen/X86/vector-partial-undef.ll b/llvm/test/CodeGen/X86/vector-partial-undef.ll
index fd41fd53e3be1..4753dba2d468f 100644
--- a/llvm/test/CodeGen/X86/vector-partial-undef.ll
+++ b/llvm/test/CodeGen/X86/vector-partial-undef.ll
@@ -151,9 +151,9 @@ define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) {
; AVX: # %bb.0:
; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7]
; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0
+; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX-NEXT: retq
%extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef>
%bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12>
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
index 07c770abc65d6..05b0a7c10b4e1 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll
@@ -2469,10 +2469,10 @@ define <8 x i32> @combine_unneeded_subvector1(<8 x i32> %a) {
;
; AVX2-FAST-ALL-LABEL: combine_unneeded_subvector1:
; AVX2-FAST-ALL: # %bb.0:
-; AVX2-FAST-ALL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: vbroadcasti128 {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
; AVX2-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1]
; AVX2-FAST-ALL-NEXT: vpermd %ymm0, %ymm1, %ymm0
+; AVX2-FAST-ALL-NEXT: vpaddd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
; AVX2-FAST-ALL-NEXT: retq
;
; AVX2-FAST-PERLANE-LABEL: combine_unneeded_subvector1:
``````````
</details>
https://github.com/llvm/llvm-project/pull/127625
More information about the llvm-commits
mailing list