[llvm] ce63cd3 - [DAG] Fold freeze(concat_vectors(x,y,...)) -> concat_vectors(freeze(x),freeze(y),...)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 8 12:27:14 PST 2023


Author: Simon Pilgrim
Date: 2023-02-08T20:26:43Z
New Revision: ce63cd3bf1df5e9940c1c5e691029830e5041332

URL: https://github.com/llvm/llvm-project/commit/ce63cd3bf1df5e9940c1c5e691029830e5041332
DIFF: https://github.com/llvm/llvm-project/commit/ce63cd3bf1df5e9940c1c5e691029830e5041332.diff

LOG: [DAG] Fold freeze(concat_vectors(x,y,...)) -> concat_vectors(freeze(x),freeze(y),...)

Another of the cleanups necessary for D136529

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/test/CodeGen/X86/avx512-broadcast-arith.ll
    llvm/test/CodeGen/X86/midpoint-int-vec-512.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 58f2ba0af0cd..172c83024f43 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14711,7 +14711,8 @@ SDValue DAGCombiner::visitFREEZE(SDNode *N) {
     return SDValue();
 
   bool AllowMultipleMaybePoisonOperands = N0.getOpcode() == ISD::BUILD_VECTOR ||
-                                          N0.getOpcode() == ISD::BUILD_PAIR;
+                                          N0.getOpcode() == ISD::BUILD_PAIR ||
+                                          N0.getOpcode() == ISD::CONCAT_VECTORS;
 
   SmallSetVector<SDValue, 8> MaybePoisonOperands;
   for (SDValue Op : N0->ops()) {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 074b19b964a0..0cbca37e9342 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -4758,6 +4758,7 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
   case ISD::AssertSext:
   case ISD::AssertZext:
   case ISD::FREEZE:
+  case ISD::CONCAT_VECTORS:
   case ISD::INSERT_SUBVECTOR:
   case ISD::AND:
   case ISD::OR:

diff  --git a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll
index 22fff49103c2..13d1265a249d 100644
--- a/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512-broadcast-arith.ll
@@ -30,15 +30,14 @@ define <64 x i8> @add_v64i8_broadcasts(<64 x i8> %a0, i64 %a1, i8 %a2) {
 ; AVX512F-NEXT:    vinserti128 $1, %xmm4, %ymm3, %ymm3
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-NEXT:    vpternlogq $216, %zmm2, %zmm1, %zmm0
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT:    vpaddb %ymm1, %ymm3, %ymm3
+; AVX512F-NEXT:    vpaddb %ymm1, %ymm0, %ymm3
+; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm3, %zmm0, %zmm3
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm3, %zmm4
 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpaddb %ymm1, %ymm3, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
-; AVX512F-NEXT:    vpternlogq $226, %zmm3, %zmm2, %zmm0
+; AVX512F-NEXT:    vpternlogq $226, %zmm4, %zmm2, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512BW-LABEL: add_v64i8_broadcasts:

diff  --git a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
index b032ebf00908..364b3612410c 100644
--- a/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
+++ b/llvm/test/CodeGen/X86/midpoint-int-vec-512.ll
@@ -283,17 +283,16 @@ define <32 x i16> @vec512_i16_signed_reg_reg(<32 x i16> %a1, <32 x i16> %a2) nou
 ; AVX512F-NEXT:    vpsubw %ymm5, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpsrlw $1, %ymm2, %ymm2
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm5
+; AVX512F-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512F-NEXT:    vpsubw %ymm2, %ymm6, %ymm2
+; AVX512F-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512F-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm1
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT:    vpsubw %ymm2, %ymm5, %ymm2
-; AVX512F-NEXT:    vpsubw %ymm1, %ymm5, %ymm5
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm5, %zmm2
-; AVX512F-NEXT:    vpternlogq $226, %zmm1, %zmm4, %zmm2
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512F-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpaddw %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_reg_reg:
@@ -311,17 +310,16 @@ define <32 x i16> @vec512_i16_signed_reg_reg(<32 x i16> %a1, <32 x i16> %a2) nou
 ; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm5, %ymm1, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm1, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm2, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm5
+; AVX512VL-FALLBACK-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm2, %ymm6, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm1
 ; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512VL-FALLBACK-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm2, %ymm5, %ymm2
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm5, %ymm5
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm5, %zmm2
-; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm1, %zmm4, %zmm2
-; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm2, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512VL-FALLBACK-NEXT:    retq
 ;
 ; AVX512BW-LABEL: vec512_i16_signed_reg_reg:
@@ -362,17 +360,16 @@ define <32 x i16> @vec512_i16_unsigned_reg_reg(<32 x i16> %a1, <32 x i16> %a2) n
 ; AVX512F-NEXT:    vpsubw %ymm6, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpsrlw $1, %ymm2, %ymm2
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm4
+; AVX512F-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512F-NEXT:    vpsubw %ymm2, %ymm6, %ymm2
+; AVX512F-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512F-NEXT:    vpternlogq $184, %zmm4, %zmm5, %zmm1
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512F-NEXT:    vpsubw %ymm2, %ymm4, %ymm2
-; AVX512F-NEXT:    vpsubw %ymm1, %ymm4, %ymm4
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm4, %zmm2
-; AVX512F-NEXT:    vpternlogq $216, %zmm5, %zmm1, %zmm2
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512F-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpaddw %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-FALLBACK-LABEL: vec512_i16_unsigned_reg_reg:
@@ -390,17 +387,16 @@ define <32 x i16> @vec512_i16_unsigned_reg_reg(<32 x i16> %a1, <32 x i16> %a2) n
 ; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm6, %ymm1, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm1, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm2, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm4
+; AVX512VL-FALLBACK-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm2, %ymm6, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512VL-FALLBACK-NEXT:    vpternlogq $184, %zmm4, %zmm5, %zmm1
 ; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512VL-FALLBACK-NEXT:    vpxor %xmm4, %xmm4, %xmm4
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm2, %ymm4, %ymm2
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm4, %ymm4
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm4, %zmm2
-; AVX512VL-FALLBACK-NEXT:    vpternlogq $216, %zmm5, %zmm1, %zmm2
-; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm2, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512VL-FALLBACK-NEXT:    retq
 ;
 ; AVX512BW-LABEL: vec512_i16_unsigned_reg_reg:
@@ -444,17 +440,16 @@ define <32 x i16> @vec512_i16_signed_mem_reg(ptr %a1_addr, <32 x i16> %a2) nounw
 ; AVX512F-NEXT:    vpsubw %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsrlw $1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm5
+; AVX512F-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512F-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
+; AVX512F-NEXT:    vpsubw %ymm0, %ymm6, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm0
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT:    vpsubw %ymm1, %ymm5, %ymm1
-; AVX512F-NEXT:    vpsubw %ymm0, %ymm5, %ymm5
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm1
-; AVX512F-NEXT:    vpternlogq $226, %zmm0, %zmm4, %zmm1
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
-; AVX512F-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512F-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_mem_reg:
@@ -473,17 +468,16 @@ define <32 x i16> @vec512_i16_signed_mem_reg(ptr %a1_addr, <32 x i16> %a2) nounw
 ; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm5, %ymm0, %ymm0
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm0, %ymm0
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm1, %ymm1
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm5
+; AVX512VL-FALLBACK-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm0, %ymm6, %ymm0
 ; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm0
 ; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm5, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm0, %ymm5, %ymm5
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm1
-; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm0, %zmm4, %zmm1
-; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512VL-FALLBACK-NEXT:    retq
 ;
 ; AVX512BW-LABEL: vec512_i16_signed_mem_reg:
@@ -527,17 +521,16 @@ define <32 x i16> @vec512_i16_signed_reg_mem(<32 x i16> %a1, ptr %a2_addr) nounw
 ; AVX512F-NEXT:    vpsubw %ymm5, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
 ; AVX512F-NEXT:    vpsrlw $1, %ymm2, %ymm2
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm5
+; AVX512F-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512F-NEXT:    vpsubw %ymm2, %ymm6, %ymm2
+; AVX512F-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512F-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm1
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT:    vpsubw %ymm2, %ymm5, %ymm2
-; AVX512F-NEXT:    vpsubw %ymm1, %ymm5, %ymm5
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm5, %zmm2
-; AVX512F-NEXT:    vpternlogq $226, %zmm1, %zmm4, %zmm2
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512F-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpaddw %ymm3, %ymm2, %ymm2
+; AVX512F-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_reg_mem:
@@ -556,17 +549,16 @@ define <32 x i16> @vec512_i16_signed_reg_mem(<32 x i16> %a1, ptr %a2_addr) nounw
 ; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm5, %ymm1, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm1, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm2, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm5
+; AVX512VL-FALLBACK-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm2, %ymm6, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
 ; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm1, %zmm1
+; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm1
 ; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
-; AVX512VL-FALLBACK-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm2, %ymm5, %ymm2
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm5, %ymm5
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm5, %zmm2
-; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm1, %zmm4, %zmm2
-; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm2, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm2, %ymm2
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512VL-FALLBACK-NEXT:    retq
 ;
 ; AVX512BW-LABEL: vec512_i16_signed_reg_mem:
@@ -611,17 +603,16 @@ define <32 x i16> @vec512_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwin
 ; AVX512F-NEXT:    vpsubw %ymm5, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsrlw $1, %ymm0, %ymm0
 ; AVX512F-NEXT:    vpsrlw $1, %ymm1, %ymm1
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm5
+; AVX512F-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512F-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
+; AVX512F-NEXT:    vpsubw %ymm0, %ymm6, %ymm0
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512F-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm0
 ; AVX512F-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512F-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512F-NEXT:    vpsubw %ymm1, %ymm5, %ymm1
-; AVX512F-NEXT:    vpsubw %ymm0, %ymm5, %ymm5
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm1
-; AVX512F-NEXT:    vpternlogq $226, %zmm0, %zmm4, %zmm1
-; AVX512F-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
-; AVX512F-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
-; AVX512F-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512F-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-FALLBACK-LABEL: vec512_i16_signed_mem_mem:
@@ -641,17 +632,16 @@ define <32 x i16> @vec512_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwin
 ; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm5, %ymm0, %ymm0
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm0, %ymm0
 ; AVX512VL-FALLBACK-NEXT:    vpsrlw $1, %ymm1, %ymm1
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm5
+; AVX512VL-FALLBACK-NEXT:    vpxor %xmm6, %xmm6, %xmm6
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm6, %ymm1
+; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm0, %ymm6, %ymm0
 ; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm5, %zmm4, %zmm0
 ; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpxor %xmm5, %xmm5, %xmm5
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm1, %ymm5, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vpsubw %ymm0, %ymm5, %ymm5
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm5, %zmm1
-; AVX512VL-FALLBACK-NEXT:    vpternlogq $226, %zmm0, %zmm4, %zmm1
-; AVX512VL-FALLBACK-NEXT:    vextracti64x4 $1, %zmm1, %ymm0
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm0, %ymm0
-; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm2, %ymm1, %ymm1
-; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm3, %ymm1, %ymm1
+; AVX512VL-FALLBACK-NEXT:    vpaddw %ymm2, %ymm0, %ymm0
+; AVX512VL-FALLBACK-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512VL-FALLBACK-NEXT:    retq
 ;
 ; AVX512BW-LABEL: vec512_i16_signed_mem_mem:


        


More information about the llvm-commits mailing list