[llvm] 8ac6b41 - [X86] Ensure VPERMV3 -> VPERMV fold comes from a double width vector

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 8 04:04:46 PDT 2024


Author: Simon Pilgrim
Date: 2024-07-08T12:04:11+01:00
New Revision: 8ac6b415e4e5e631410d9cf6a10f15668f663441

URL: https://github.com/llvm/llvm-project/commit/8ac6b415e4e5e631410d9cf6a10f15668f663441
DIFF: https://github.com/llvm/llvm-project/commit/8ac6b415e4e5e631410d9cf6a10f15668f663441.diff

LOG: [X86] Ensure VPERMV3 -> VPERMV fold comes from a double width vector

#96414 + #97206 didn't ensure that we were extracting subvectors from a vector double the width of the destination.

We can relax this in a future patch, but fix the #97968 crash first.

Fixes #97968

Added: 
    llvm/test/CodeGen/X86/pr97968.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index e03edf92cc4780..14d287d2d5e90b 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41336,6 +41336,7 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
   case X86ISD::VPERMV3: {
     // Combine VPERMV3 to widened VPERMV if the two source operands are split
     // from the same vector.
+    // TODO: Handle extraction from a wider source vector (e.g. v16i32 -> v4i32).
     SDValue V1 = peekThroughBitcasts(N.getOperand(0));
     SDValue V2 = peekThroughBitcasts(N.getOperand(2));
     MVT SVT = V1.getSimpleValueType();
@@ -41346,7 +41347,8 @@ static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
         V1.getConstantOperandVal(1) == 0 &&
         V2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
         V2.getConstantOperandVal(1) == SVT.getVectorNumElements() &&
-        V1.getOperand(0) == V2.getOperand(0)) {
+        V1.getOperand(0) == V2.getOperand(0) &&
+        V1.getOperand(0).getValueSizeInBits() == NVT.getSizeInBits()) {
       SDValue Mask =
           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NVT, DAG.getUNDEF(NVT),
                       N.getOperand(1), DAG.getIntPtrConstant(0, DL));

diff  --git a/llvm/test/CodeGen/X86/pr97968.ll b/llvm/test/CodeGen/X86/pr97968.ll
new file mode 100644
index 00000000000000..103c60bfdfc7f2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr97968.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
+
+define <2 x i32> @PR97968(<16 x i32> %a0) {
+; CHECK-LABEL: PR97968:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vpbroadcastq {{.*#+}} xmm1 = [2,7,2,7]
+; CHECK-NEXT:    vextracti128 $1, %ymm0, %xmm2
+; CHECK-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1
+; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %sub0 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %sub1 = shufflevector <16 x i32> %a0, <16 x i32> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
+  %elt0 = extractelement <4 x i32> %sub0, i64 2
+  %elt7 = extractelement <4 x i32> %sub1, i64 3
+  %scl0 = insertelement <2 x i32> undef, i32 %elt0, i32 0
+  %scl1 = insertelement <2 x i32> %scl0, i32 %elt7, i32 1
+  ret <2 x i32> %scl1
+}


        


More information about the llvm-commits mailing list