[llvm] 9b72e0f - [X86] combineConcatVectorOps - fold concat(permilpd(x),permilpd(y)) -> permilpd(concat(x,y))

Fri Jan 14 07:51:49 PST 2022

Author: Simon Pilgrim
Date: 2022-01-14T15:48:57Z
New Revision: 9b72e0f9a2b92c6076262d5dfdaf33a89c7affa7

URL: https://github.com/llvm/llvm-project/commit/9b72e0f9a2b92c6076262d5dfdaf33a89c7affa7
DIFF: https://github.com/llvm/llvm-project/commit/9b72e0f9a2b92c6076262d5dfdaf33a89c7affa7.diff

LOG: [X86] combineConcatVectorOps - fold concat(permilpd(x),permilpd(y)) -> permilpd(concat(x,y))

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 352bf0f7abaa..a5061f5a9776 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -52465,7 +52465,6 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
       }
       LLVM_FALLTHROUGH;
     case X86ISD::VPERMILPI:
-      // TODO - add support for vXf64/vXi64 shuffles.
       if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) &&
           Subtarget.hasAVX() && Op0.getOperand(1) == Ops[1].getOperand(1)) {
         SDValue Res = DAG.getBitcast(MVT::v8f32, ConcatSubOperand(VT, Ops, 0));
@@ -52473,6 +52472,14 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
                           Op0.getOperand(1));
         return DAG.getBitcast(VT, Res);
       }
+      if (!IsSplat && NumOps == 2 && VT == MVT::v4f64) {
+        uint64_t Idx0 = Ops[0].getConstantOperandVal(1);
+        uint64_t Idx1 = Ops[1].getConstantOperandVal(1);
+        uint64_t Idx = ((Idx1 & 3) << 2) | (Idx0 & 3);
+        return DAG.getNode(Op0.getOpcode(), DL, VT,
+                           ConcatSubOperand(VT, Ops, 0),
+                           DAG.getTargetConstant(Idx, DL, MVT::i8));
+      }
       break;
     case X86ISD::VPERMV3:
       if (!IsSplat && NumOps == 2 && VT.is512BitVector()) {

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
index 3af2605f0de0..6bebb91b7afe 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll
@@ -742,9 +742,9 @@ define <4 x double> @shuffle_v4f64_0044_v2f64(<2 x double> %a, <2 x double> %b)
 define <4 x double> @shuffle_v4f64_1032_v2f64(<2 x double> %a, <2 x double> %b) {
 ; ALL-LABEL: shuffle_v4f64_1032_v2f64:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
-; ALL-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
+; ALL-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
 ; ALL-NEXT:    retq
   %1 = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 0>
   %2 = shufflevector <2 x double> %b, <2 x double> undef, <2 x i32> <i32 1, i32 0>