[llvm] fd347ce - [X86] Add InstFixup for masked `unpck{l|h}pd` -> masked `shufpd`

Noah Goldstein via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 5 23:37:09 PDT 2023


Author: Noah Goldstein
Date: 2023-04-06T01:36:42-05:00
New Revision: fd347ceac490e28a1b1590e05ac6f9e570d4dc99

URL: https://github.com/llvm/llvm-project/commit/fd347ceac490e28a1b1590e05ac6f9e570d4dc99
DIFF: https://github.com/llvm/llvm-project/commit/fd347ceac490e28a1b1590e05ac6f9e570d4dc99.diff

LOG: [X86] Add InstFixup for masked `unpck{l|h}pd` -> masked `shufpd`

This is a follow up D147507 which removed the prior transformation to
`shufps` which was incorrect as the mask was for 64-bit double
elements, not 32-bit float elements. Using `shufpd` for the
replacement, however, preserves the mask semantics and has the same
benefits as `shufps`.

Reviewed By: pengfei, RKSimon

Differential Revision: https://reviews.llvm.org/D147541

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86FixupInstTuning.cpp
    llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
    llvm/test/CodeGen/X86/tuning-shuffle-unpckpd.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FixupInstTuning.cpp b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
index 13ed52e385884..5da5b91653be4 100644
--- a/llvm/lib/Target/X86/X86FixupInstTuning.cpp
+++ b/llvm/lib/Target/X86/X86FixupInstTuning.cpp
@@ -156,6 +156,8 @@ bool X86FixupInstTuningPass::processInstruction(
 
   // `vunpcklpd/vmovlhps r, r` -> `vshufps r, r, 0x44`
   // `vunpckhpd/vmovlhps r, r` -> `vshufps r, r, 0xee`
+  // `vunpcklpd r, r, k` -> `vshufpd r, r, 0x00`
+  // `vunpckhpd r, r, k` -> `vshufpd r, r, 0xff`
   // iff `vshufps` is faster than `vunpck{l|h}pd`. Otherwise stick with
   // `vunpck{l|h}pd` as it uses less code size.
   // TODO: Look into using `{VP}UNPCK{L|H}QDQ{...}` instead of `{V}SHUF{...}PS`
@@ -168,11 +170,12 @@ bool X86FixupInstTuningPass::processInstruction(
     MI.addOperand(MachineOperand::CreateImm(MaskImm));
     return true;
   };
+
   auto ProcessUNPCKLPDrr = [&](unsigned NewOpc) -> bool {
-    return ProcessUNPCKPD(NewOpc, 0x44);
+    return ProcessUNPCKPD(NewOpc, 0x00);
   };
   auto ProcessUNPCKHPDrr = [&](unsigned NewOpc) -> bool {
-    return ProcessUNPCKPD(NewOpc, 0xee);
+    return ProcessUNPCKPD(NewOpc, 0xff);
   };
 
   switch (Opc) {
@@ -240,23 +243,47 @@ bool X86FixupInstTuningPass::processInstruction(
     // VMOVLHPS is always 128 bits.
   case X86::VMOVLHPSZrr:
   case X86::VUNPCKLPDZ128rr:
-    return ProcessUNPCKLPDrr(X86::VSHUFPSZ128rri);
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZ128rri);
   case X86::VUNPCKLPDZ256rr:
-    return ProcessUNPCKLPDrr(X86::VSHUFPSZ256rri);
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZ256rri);
   case X86::VUNPCKLPDZrr:
-    return ProcessUNPCKLPDrr(X86::VSHUFPSZrri);
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZrri);
+  case X86::VUNPCKLPDZ128rrk:
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZ128rrik);
+  case X86::VUNPCKLPDZ256rrk:
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZ256rrik);
+  case X86::VUNPCKLPDZrrk:
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZrrik);
+  case X86::VUNPCKLPDZ128rrkz:
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZ128rrikz);
+  case X86::VUNPCKLPDZ256rrkz:
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZ256rrikz);
+  case X86::VUNPCKLPDZrrkz:
+    return ProcessUNPCKLPDrr(X86::VSHUFPDZrrikz);
   case X86::UNPCKHPDrr:
-    return ProcessUNPCKHPDrr(X86::SHUFPSrri);
+    return ProcessUNPCKHPDrr(X86::SHUFPDrri);
   case X86::VUNPCKHPDrr:
-    return ProcessUNPCKHPDrr(X86::VSHUFPSrri);
+    return ProcessUNPCKHPDrr(X86::VSHUFPDrri);
   case X86::VUNPCKHPDYrr:
-    return ProcessUNPCKHPDrr(X86::VSHUFPSYrri);
+    return ProcessUNPCKHPDrr(X86::VSHUFPDYrri);
   case X86::VUNPCKHPDZ128rr:
-    return ProcessUNPCKHPDrr(X86::VSHUFPSZ128rri);
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZ128rri);
   case X86::VUNPCKHPDZ256rr:
-    return ProcessUNPCKHPDrr(X86::VSHUFPSZ256rri);
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZ256rri);
   case X86::VUNPCKHPDZrr:
-    return ProcessUNPCKHPDrr(X86::VSHUFPSZrri);
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZrri);
+  case X86::VUNPCKHPDZ128rrk:
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZ128rrik);
+  case X86::VUNPCKHPDZ256rrk:
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZ256rrik);
+  case X86::VUNPCKHPDZrrk:
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZrrik);
+  case X86::VUNPCKHPDZ128rrkz:
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZ128rrikz);
+  case X86::VUNPCKHPDZ256rrkz:
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZ256rrikz);
+  case X86::VUNPCKHPDZrrkz:
+    return ProcessUNPCKHPDrr(X86::VSHUFPDZrrikz);
   default:
     return false;
   }

diff  --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
index 8ab1ab5c8a3db..8e3ef2e8e8f8f 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
@@ -32,7 +32,7 @@ define <8 x float> @transform_VUNPCKLPDYrr(<8 x float> %a, <8 x float> %b) nounw
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKLPDYrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1],ymm0[4,5],ymm1[4,5]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
 ; CHECK-ICX-NEXT:    retq
 ;
 ; CHECK-V4-LABEL: transform_VUNPCKLPDYrr:
@@ -61,7 +61,7 @@ define <8 x float> @transform_VUNPCKHPDYrr(<8 x float> %a, <8 x float> %b) nounw
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKHPDYrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3],ymm0[6,7],ymm1[6,7]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
 ; CHECK-ICX-NEXT:    retq
 ;
 ; CHECK-V4-LABEL: transform_VUNPCKHPDYrr:
@@ -90,7 +90,7 @@ define <4 x float> @transform_VUNPCKLPDrr(<4 x float> %a, <4 x float> %b) nounwi
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKLPDrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-ICX-NEXT:    retq
 ;
 ; CHECK-V4-LABEL: transform_VUNPCKLPDrr:
@@ -119,7 +119,7 @@ define <4 x float> @transform_VUNPCKHPDrr(<4 x float> %a, <4 x float> %b) nounwi
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKHPDrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[2,3]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
 ; CHECK-ICX-NEXT:    retq
 ;
 ; CHECK-V4-LABEL: transform_VUNPCKHPDrr:
@@ -164,52 +164,144 @@ define <8 x double> @transform_VUNPCKHPDZrrkz(<8 x double> %a, <8 x double> %b,
   ret <8 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <4 x double> @transform_VUNPCKLPDYrrkz(<4 x double> %a, <4 x double> %b, i4 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKLPDYrrkz:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKLPDYrrkz:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKLPDYrrkz:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKLPDYrrkz:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKLPDYrrkz:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKLPDYrrkz:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i4 %mask_int to <4 x i1>
   %shufp = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   %res = select <4 x i1> %mask, <4 x double> %shufp, <4 x double> zeroinitializer
   ret <4 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <4 x double> @transform_VUNPCKHPDYrrkz(<4 x double> %a, <4 x double> %b, i4 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKHPDYrrkz:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKHPDYrrkz:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKHPDYrrkz:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKHPDYrrkz:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKHPDYrrkz:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKHPDYrrkz:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i4 %mask_int to <4 x i1>
   %shufp = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   %res = select <4 x i1> %mask, <4 x double> %shufp, <4 x double> zeroinitializer
   ret <4 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <2 x double> @transform_VUNPCKLPDrrkz(<2 x double> %a, <2 x double> %b, i2 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKLPDrrkz:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKLPDrrkz:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKLPDrrkz:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKLPDrrkz:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKLPDrrkz:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKLPDrrkz:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i2 %mask_int to <2 x i1>
   %shufp = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   %res = select <2 x i1> %mask, <2 x double> %shufp, <2 x double> zeroinitializer
   ret <2 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <2 x double> @transform_VUNPCKHPDrrkz(<2 x double> %a, <2 x double> %b, i2 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKHPDrrkz:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKHPDrrkz:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKHPDrrkz:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKHPDrrkz:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKHPDrrkz:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKHPDrrkz:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i2 %mask_int to <2 x i1>
   %shufp = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   %res = select <2 x i1> %mask, <2 x double> %shufp, <2 x double> zeroinitializer
@@ -242,56 +334,164 @@ define <8 x double> @transform_VUNPCKHPDZrrk(<8 x double> %a, <8 x double> %b, <
   ret <8 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <4 x double> @transform_VUNPCKLPDYrrk(<4 x double> %a, <4 x double> %b, <4 x double> %c, i4 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKLPDYrrk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
-; CHECK-NEXT:    vmovapd %ymm2, %ymm0
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKLPDYrrk:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-SKX-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKLPDYrrk:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-ICX-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKLPDYrrk:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-V4-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKLPDYrrk:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-AVX512-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKLPDYrrk:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
+; CHECK-ZNVER4-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i4 %mask_int to <4 x i1>
   %shufp = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   %res = select <4 x i1> %mask, <4 x double> %shufp, <4 x double> %c
   ret <4 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <4 x double> @transform_VUNPCKHPDYrrk(<4 x double> %a, <4 x double> %b, <4 x double> %c, i4 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKHPDYrrk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
-; CHECK-NEXT:    vmovapd %ymm2, %ymm0
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKHPDYrrk:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-SKX-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKHPDYrrk:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-ICX-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKHPDYrrk:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-V4-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKHPDYrrk:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-AVX512-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKHPDYrrk:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
+; CHECK-ZNVER4-NEXT:    vmovapd %ymm2, %ymm0
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i4 %mask_int to <4 x i1>
   %shufp = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
   %res = select <4 x i1> %mask, <4 x double> %shufp, <4 x double> %c
   ret <4 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <2 x double> @transform_VUNPCKLPDrrk(<2 x double> %a, <2 x double> %b, <2 x double> %c, i2 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKLPDrrk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
-; CHECK-NEXT:    vmovapd %xmm2, %xmm0
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKLPDrrk:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
+; CHECK-SKX-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKLPDrrk:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
+; CHECK-ICX-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKLPDrrk:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
+; CHECK-V4-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKLPDrrk:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
+; CHECK-AVX512-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKLPDrrk:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
+; CHECK-ZNVER4-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i2 %mask_int to <2 x i1>
   %shufp = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 2>
   %res = select <2 x i1> %mask, <2 x double> %shufp, <2 x double> %c
   ret <2 x double> %res
 }
 
-; Check that masked vunpcklpd will not be transformed into vshufps.
 define <2 x double> @transform_VUNPCKHPDrrk(<2 x double> %a, <2 x double> %b, <2 x double> %c, i2 %mask_int) nounwind {
-; CHECK-LABEL: transform_VUNPCKHPDrrk:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kmovd %edi, %k1
-; CHECK-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
-; CHECK-NEXT:    vmovapd %xmm2, %xmm0
-; CHECK-NEXT:    retq
+; CHECK-SKX-LABEL: transform_VUNPCKHPDrrk:
+; CHECK-SKX:       # %bb.0:
+; CHECK-SKX-NEXT:    kmovd %edi, %k1
+; CHECK-SKX-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
+; CHECK-SKX-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-SKX-NEXT:    retq
+;
+; CHECK-ICX-LABEL: transform_VUNPCKHPDrrk:
+; CHECK-ICX:       # %bb.0:
+; CHECK-ICX-NEXT:    kmovd %edi, %k1
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
+; CHECK-ICX-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-ICX-NEXT:    retq
+;
+; CHECK-V4-LABEL: transform_VUNPCKHPDrrk:
+; CHECK-V4:       # %bb.0:
+; CHECK-V4-NEXT:    kmovd %edi, %k1
+; CHECK-V4-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
+; CHECK-V4-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-V4-NEXT:    retq
+;
+; CHECK-AVX512-LABEL: transform_VUNPCKHPDrrk:
+; CHECK-AVX512:       # %bb.0:
+; CHECK-AVX512-NEXT:    kmovd %edi, %k1
+; CHECK-AVX512-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
+; CHECK-AVX512-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-AVX512-NEXT:    retq
+;
+; CHECK-ZNVER4-LABEL: transform_VUNPCKHPDrrk:
+; CHECK-ZNVER4:       # %bb.0:
+; CHECK-ZNVER4-NEXT:    kmovd %edi, %k1
+; CHECK-ZNVER4-NEXT:    vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
+; CHECK-ZNVER4-NEXT:    vmovapd %xmm2, %xmm0
+; CHECK-ZNVER4-NEXT:    retq
   %mask = bitcast i2 %mask_int to <2 x i1>
   %shufp = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 1, i32 3>
   %res = select <2 x i1> %mask, <2 x double> %shufp, <2 x double> %c

diff  --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd.ll
index 9de90fd8da4a9..8fb134c8ce4f8 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd.ll
@@ -11,7 +11,7 @@ define <8 x float> @transform_VUNPCKLPDYrr(<8 x float> %a, <8 x float> %b) nounw
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKLPDYrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,1],ymm1[0,1],ymm0[4,5],ymm1[4,5]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
 ; CHECK-ICX-NEXT:    retq
   %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13>
   ret <8 x float> %shufp
@@ -25,7 +25,7 @@ define <8 x float> @transform_VUNPCKHPDYrr(<8 x float> %a, <8 x float> %b) nounw
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKHPDYrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3],ymm0[6,7],ymm1[6,7]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
 ; CHECK-ICX-NEXT:    retq
   %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15>
   ret <8 x float> %shufp
@@ -39,7 +39,7 @@ define <4 x float> @transform_VUNPCKLPDrr(<4 x float> %a, <4 x float> %b) nounwi
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKLPDrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,1]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; CHECK-ICX-NEXT:    retq
   %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
   ret <4 x float> %shufp
@@ -53,7 +53,7 @@ define <4 x float> @transform_VUNPCKHPDrr(<4 x float> %a, <4 x float> %b) nounwi
 ;
 ; CHECK-ICX-LABEL: transform_VUNPCKHPDrr:
 ; CHECK-ICX:       # %bb.0:
-; CHECK-ICX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[2,3]
+; CHECK-ICX-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
 ; CHECK-ICX-NEXT:    retq
   %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
   ret <4 x float> %shufp


        


More information about the llvm-commits mailing list