[llvm] r365785 - [DAGCombine] narrowInsertExtractVectorBinOp - add CONCAT_VECTORS support
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 11 07:45:04 PDT 2019
Author: rksimon
Date: Thu Jul 11 07:45:03 2019
New Revision: 365785
URL: http://llvm.org/viewvc/llvm-project?rev=365785&view=rev
Log:
[DAGCombine] narrowInsertExtractVectorBinOp - add CONCAT_VECTORS support
We already split extract_subvector(binop(insert_subvector(v,x),insert_subvector(w,y))) -> binop(x,y).
This patch adds support for extract_subvector(binop(concat_vectors(),concat_vectors())) cases as well.
In particular this means we don't have to wait for X86 lowering to convert concat_vectors to insert_subvector chains, which helps avoid some cases where demandedelts/combine calls occur too late to split large vector ops.
The fast-isel-store.ll load folding regression is annoying but I don't think is that critical.
Differential Revision: https://reviews.llvm.org/D63653
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
llvm/trunk/test/CodeGen/X86/fast-isel-store.ll
llvm/trunk/test/CodeGen/X86/machine-combiner-int-vec.ll
llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
llvm/trunk/test/CodeGen/X86/vec_saddo.ll
llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jul 11 07:45:03 2019
@@ -18002,11 +18002,21 @@ static SDValue narrowInsertExtractVector
SDValue Index = Extract->getOperand(1);
EVT VT = Extract->getValueType(0);
+ // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+ // if the source subvector is the same type as the one being extracted.
auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
- if (V.getOpcode() != ISD::INSERT_SUBVECTOR ||
- V.getOperand(1).getValueType() != VT || V.getOperand(2) != Index)
- return SDValue();
- return V.getOperand(1);
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == VT &&
+ (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
};
SDValue Sub0 = GetSubVector(Bop0);
SDValue Sub1 = GetSubVector(Bop1);
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmax_legacy.f16.ll Thu Jul 11 07:45:03 2019
@@ -153,8 +153,8 @@ define <3 x half> @test_fmax_legacy_ugt_
; GFX9-NNAN-LABEL: test_fmax_legacy_ugt_v3f16:
; GFX9-NNAN: ; %bb.0:
; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
; GFX9-NNAN-NEXT: v_pk_max_f16 v1, v1, v3
+; GFX9-NNAN-NEXT: v_pk_max_f16 v0, v0, v2
; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: test_fmax_legacy_ugt_v3f16:
Modified: llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/fmin_legacy.f16.ll Thu Jul 11 07:45:03 2019
@@ -154,8 +154,8 @@ define <3 x half> @test_fmin_legacy_ule_
; GFX9-NNAN-LABEL: test_fmin_legacy_ule_v3f16:
; GFX9-NNAN: ; %bb.0:
; GFX9-NNAN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
; GFX9-NNAN-NEXT: v_pk_min_f16 v1, v1, v3
+; GFX9-NNAN-NEXT: v_pk_min_f16 v0, v0, v2
; GFX9-NNAN-NEXT: s_setpc_b64 s[30:31]
;
; VI-SAFE-LABEL: test_fmin_legacy_ule_v3f16:
Modified: llvm/trunk/test/CodeGen/X86/fast-isel-store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fast-isel-store.ll?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fast-isel-store.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fast-isel-store.ll Thu Jul 11 07:45:03 2019
@@ -336,9 +336,9 @@ define <8 x float> @test_store_8xf32_ali
define <4 x double> @test_store_4xf64(<4 x double>* nocapture %addr, <4 x double> %value, <4 x double> %value2) {
; SSE32-LABEL: test_store_4xf64:
; SSE32: # %bb.0:
-; SSE32-NEXT: addpd %xmm3, %xmm1
; SSE32-NEXT: addpd %xmm2, %xmm0
; SSE32-NEXT: movupd %xmm0, (%rdi)
+; SSE32-NEXT: addpd %xmm3, %xmm1
; SSE32-NEXT: movupd %xmm1, 16(%rdi)
; SSE32-NEXT: retq
;
@@ -346,10 +346,11 @@ define <4 x double> @test_store_4xf64(<4
; SSE64: # %bb.0:
; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .cfi_def_cfa_offset 16
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
; SSE64-NEXT: addpd %xmm2, %xmm0
; SSE64-NEXT: movupd %xmm0, (%eax)
+; SSE64-NEXT: addpd %xmm3, %xmm1
; SSE64-NEXT: movupd %xmm1, 16(%eax)
; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .cfi_def_cfa_offset 4
@@ -375,9 +376,9 @@ define <4 x double> @test_store_4xf64(<4
define <4 x double> @test_store_4xf64_aligned(<4 x double>* nocapture %addr, <4 x double> %value, <4 x double> %value2) {
; SSE32-LABEL: test_store_4xf64_aligned:
; SSE32: # %bb.0:
-; SSE32-NEXT: addpd %xmm3, %xmm1
; SSE32-NEXT: addpd %xmm2, %xmm0
; SSE32-NEXT: movapd %xmm0, (%rdi)
+; SSE32-NEXT: addpd %xmm3, %xmm1
; SSE32-NEXT: movapd %xmm1, 16(%rdi)
; SSE32-NEXT: retq
;
@@ -385,10 +386,11 @@ define <4 x double> @test_store_4xf64_al
; SSE64: # %bb.0:
; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .cfi_def_cfa_offset 16
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
; SSE64-NEXT: addpd %xmm2, %xmm0
; SSE64-NEXT: movapd %xmm0, (%eax)
+; SSE64-NEXT: addpd %xmm3, %xmm1
; SSE64-NEXT: movapd %xmm1, 16(%eax)
; SSE64-NEXT: addl $12, %esp
; SSE64-NEXT: .cfi_def_cfa_offset 4
@@ -614,13 +616,13 @@ define <16 x float> @test_store_16xf32_a
define <8 x double> @test_store_8xf64(<8 x double>* nocapture %addr, <8 x double> %value, <8 x double> %value2) {
; SSE32-LABEL: test_store_8xf64:
; SSE32: # %bb.0:
-; SSE32-NEXT: addpd %xmm7, %xmm3
-; SSE32-NEXT: addpd %xmm6, %xmm2
-; SSE32-NEXT: addpd %xmm5, %xmm1
; SSE32-NEXT: addpd %xmm4, %xmm0
; SSE32-NEXT: movupd %xmm0, (%rdi)
+; SSE32-NEXT: addpd %xmm5, %xmm1
; SSE32-NEXT: movupd %xmm1, 16(%rdi)
+; SSE32-NEXT: addpd %xmm6, %xmm2
; SSE32-NEXT: movupd %xmm2, 32(%rdi)
+; SSE32-NEXT: addpd %xmm7, %xmm3
; SSE32-NEXT: movupd %xmm3, 48(%rdi)
; SSE32-NEXT: retq
;
@@ -628,14 +630,17 @@ define <8 x double> @test_store_8xf64(<8
; SSE64: # %bb.0:
; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .cfi_def_cfa_offset 16
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm4
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm5
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm6
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
+; SSE64-NEXT: addpd %xmm4, %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm3
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm2
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm0
; SSE64-NEXT: movupd %xmm0, (%eax)
+; SSE64-NEXT: addpd %xmm6, %xmm1
; SSE64-NEXT: movupd %xmm1, 16(%eax)
+; SSE64-NEXT: addpd %xmm5, %xmm2
; SSE64-NEXT: movupd %xmm2, 32(%eax)
; SSE64-NEXT: movupd %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp
@@ -644,9 +649,9 @@ define <8 x double> @test_store_8xf64(<8
;
; AVXONLY32-LABEL: test_store_8xf64:
; AVXONLY32: # %bb.0:
-; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
; AVXONLY32-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVXONLY32-NEXT: vmovupd %ymm0, (%rdi)
+; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
; AVXONLY32-NEXT: vmovupd %ymm1, 32(%rdi)
; AVXONLY32-NEXT: retq
;
@@ -659,10 +664,11 @@ define <8 x double> @test_store_8xf64(<8
; AVXONLY64-NEXT: .cfi_def_cfa_register %ebp
; AVXONLY64-NEXT: andl $-32, %esp
; AVXONLY64-NEXT: subl $32, %esp
+; AVXONLY64-NEXT: vmovapd 40(%ebp), %ymm3
; AVXONLY64-NEXT: movl 8(%ebp), %eax
-; AVXONLY64-NEXT: vaddpd 40(%ebp), %ymm1, %ymm1
; AVXONLY64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVXONLY64-NEXT: vmovupd %ymm0, (%eax)
+; AVXONLY64-NEXT: vaddpd %ymm3, %ymm1, %ymm1
; AVXONLY64-NEXT: vmovupd %ymm1, 32(%eax)
; AVXONLY64-NEXT: movl %ebp, %esp
; AVXONLY64-NEXT: popl %ebp
@@ -689,13 +695,13 @@ define <8 x double> @test_store_8xf64(<8
define <8 x double> @test_store_8xf64_aligned(<8 x double>* nocapture %addr, <8 x double> %value, <8 x double> %value2) {
; SSE32-LABEL: test_store_8xf64_aligned:
; SSE32: # %bb.0:
-; SSE32-NEXT: addpd %xmm7, %xmm3
-; SSE32-NEXT: addpd %xmm6, %xmm2
-; SSE32-NEXT: addpd %xmm5, %xmm1
; SSE32-NEXT: addpd %xmm4, %xmm0
; SSE32-NEXT: movapd %xmm0, (%rdi)
+; SSE32-NEXT: addpd %xmm5, %xmm1
; SSE32-NEXT: movapd %xmm1, 16(%rdi)
+; SSE32-NEXT: addpd %xmm6, %xmm2
; SSE32-NEXT: movapd %xmm2, 32(%rdi)
+; SSE32-NEXT: addpd %xmm7, %xmm3
; SSE32-NEXT: movapd %xmm3, 48(%rdi)
; SSE32-NEXT: retq
;
@@ -703,14 +709,17 @@ define <8 x double> @test_store_8xf64_al
; SSE64: # %bb.0:
; SSE64-NEXT: subl $12, %esp
; SSE64-NEXT: .cfi_def_cfa_offset 16
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm4
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm5
+; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm6
; SSE64-NEXT: movapd {{[0-9]+}}(%esp), %xmm3
+; SSE64-NEXT: addpd %xmm4, %xmm3
; SSE64-NEXT: movl {{[0-9]+}}(%esp), %eax
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm3
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm2
-; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm1
; SSE64-NEXT: addpd {{[0-9]+}}(%esp), %xmm0
; SSE64-NEXT: movapd %xmm0, (%eax)
+; SSE64-NEXT: addpd %xmm6, %xmm1
; SSE64-NEXT: movapd %xmm1, 16(%eax)
+; SSE64-NEXT: addpd %xmm5, %xmm2
; SSE64-NEXT: movapd %xmm2, 32(%eax)
; SSE64-NEXT: movapd %xmm3, 48(%eax)
; SSE64-NEXT: addl $12, %esp
@@ -719,9 +728,9 @@ define <8 x double> @test_store_8xf64_al
;
; AVXONLY32-LABEL: test_store_8xf64_aligned:
; AVXONLY32: # %bb.0:
-; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
; AVXONLY32-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVXONLY32-NEXT: vmovapd %ymm0, (%rdi)
+; AVXONLY32-NEXT: vaddpd %ymm3, %ymm1, %ymm1
; AVXONLY32-NEXT: vmovapd %ymm1, 32(%rdi)
; AVXONLY32-NEXT: retq
;
@@ -734,10 +743,11 @@ define <8 x double> @test_store_8xf64_al
; AVXONLY64-NEXT: .cfi_def_cfa_register %ebp
; AVXONLY64-NEXT: andl $-32, %esp
; AVXONLY64-NEXT: subl $32, %esp
+; AVXONLY64-NEXT: vmovapd 40(%ebp), %ymm3
; AVXONLY64-NEXT: movl 8(%ebp), %eax
-; AVXONLY64-NEXT: vaddpd 40(%ebp), %ymm1, %ymm1
; AVXONLY64-NEXT: vaddpd %ymm2, %ymm0, %ymm0
; AVXONLY64-NEXT: vmovapd %ymm0, (%eax)
+; AVXONLY64-NEXT: vaddpd %ymm3, %ymm1, %ymm1
; AVXONLY64-NEXT: vmovapd %ymm1, 32(%eax)
; AVXONLY64-NEXT: movl %ebp, %esp
; AVXONLY64-NEXT: popl %ebp
Modified: llvm/trunk/test/CodeGen/X86/machine-combiner-int-vec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/machine-combiner-int-vec.ll?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/machine-combiner-int-vec.ll (original)
+++ llvm/trunk/test/CodeGen/X86/machine-combiner-int-vec.ll Thu Jul 11 07:45:03 2019
@@ -73,10 +73,10 @@ define <4 x i32> @reassociate_xor_v4i32(
define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
; SSE-LABEL: reassociate_and_v8i32:
; SSE: # %bb.0:
-; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: pand %xmm6, %xmm4
; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: pand %xmm7, %xmm5
; SSE-NEXT: pand %xmm5, %xmm1
; SSE-NEXT: retq
@@ -97,10 +97,10 @@ define <8 x i32> @reassociate_and_v8i32(
define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
; SSE-LABEL: reassociate_or_v8i32:
; SSE: # %bb.0:
-; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: por %xmm6, %xmm4
; SSE-NEXT: por %xmm4, %xmm0
+; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: por %xmm7, %xmm5
; SSE-NEXT: por %xmm5, %xmm1
; SSE-NEXT: retq
@@ -121,10 +121,10 @@ define <8 x i32> @reassociate_or_v8i32(<
define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, <8 x i32> %x3) {
; SSE-LABEL: reassociate_xor_v8i32:
; SSE: # %bb.0:
-; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: pxor %xmm6, %xmm4
; SSE-NEXT: pxor %xmm4, %xmm0
+; SSE-NEXT: paddd %xmm3, %xmm1
; SSE-NEXT: pxor %xmm7, %xmm5
; SSE-NEXT: pxor %xmm5, %xmm1
; SSE-NEXT: retq
@@ -164,10 +164,10 @@ define <16 x i32> @reassociate_and_v16i3
;
; AVX2-LABEL: reassociate_and_v16i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpand %ymm6, %ymm4, %ymm2
; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpand %ymm7, %ymm5, %ymm2
; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
@@ -204,10 +204,10 @@ define <16 x i32> @reassociate_or_v16i32
;
; AVX2-LABEL: reassociate_or_v16i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpor %ymm6, %ymm4, %ymm2
; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpor %ymm7, %ymm5, %ymm2
; AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
@@ -244,10 +244,10 @@ define <16 x i32> @reassociate_xor_v16i3
;
; AVX2-LABEL: reassociate_xor_v16i32:
; AVX2: # %bb.0:
-; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; AVX2-NEXT: vpxor %ymm6, %ymm4, %ymm2
; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; AVX2-NEXT: vpxor %ymm7, %ymm5, %ymm2
; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/nontemporal-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/nontemporal-2.ll?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/nontemporal-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/nontemporal-2.ll Thu Jul 11 07:45:03 2019
@@ -1234,9 +1234,9 @@ define void @test_op_v32i8(<32 x i8> %a,
define void @test_unaligned_v8f32(<8 x float> %a, <8 x float> %b, <8 x float>* %dst) {
; SSE-LABEL: test_unaligned_v8f32:
; SSE: # %bb.0:
-; SSE-NEXT: addps %xmm2, %xmm0
; SSE-NEXT: addps %xmm3, %xmm1
; SSE-NEXT: movntps %xmm1, 16(%rdi)
+; SSE-NEXT: addps %xmm2, %xmm0
; SSE-NEXT: movntps %xmm0, (%rdi)
; SSE-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/vec_saddo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_saddo.ll?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_saddo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_saddo.ll Thu Jul 11 07:45:03 2019
@@ -807,50 +807,46 @@ define <16 x i32> @saddo_v16i32(<16 x i3
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm8
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
; AVX1-NEXT: vpcmpgtd %xmm7, %xmm5, %xmm4
-; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm10
-; AVX1-NEXT: vpcmpeqd %xmm8, %xmm10, %xmm8
-; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm4
-; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm11
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm5, %xmm4
; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqd %xmm11, %xmm4, %xmm11
-; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm11, %ymm11
+; AVX1-NEXT: vpcmpeqd %xmm8, %xmm4, %xmm8
; AVX1-NEXT: vpaddd %xmm9, %xmm7, %xmm9
; AVX1-NEXT: vpcmpgtd %xmm9, %xmm5, %xmm7
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT: vpcmpeqd %xmm7, %xmm10, %xmm7
+; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm4
+; AVX1-NEXT: vpandn %xmm8, %xmm4, %xmm8
+; AVX1-NEXT: vpcmpgtd %xmm3, %xmm5, %xmm7
+; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtd %xmm1, %xmm5, %xmm4
+; AVX1-NEXT: vpxor %xmm6, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm7
; AVX1-NEXT: vpaddd %xmm3, %xmm1, %xmm10
; AVX1-NEXT: vpcmpgtd %xmm10, %xmm5, %xmm1
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm4, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm1, %ymm1
-; AVX1-NEXT: vandnps %ymm11, %ymm1, %ymm1
-; AVX1-NEXT: vpandn %xmm8, %xmm7, %xmm4
-; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm8
+; AVX1-NEXT: vpandn %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vpackssdw %xmm8, %xmm1, %xmm8
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm7
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX1-NEXT: vpcmpgtd %xmm1, %xmm5, %xmm3
; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm11
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm5, %xmm7
-; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm12
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm5, %xmm7
-; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
-; AVX1-NEXT: vpcmpeqd %xmm12, %xmm7, %xmm12
-; AVX1-NEXT: vinsertf128 $1, %xmm11, %ymm12, %ymm12
+; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm7
; AVX1-NEXT: vpaddd %xmm4, %xmm1, %xmm4
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm1
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpandn %xmm7, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpgtd %xmm2, %xmm5, %xmm3
+; AVX1-NEXT: vpxor %xmm6, %xmm3, %xmm3
+; AVX1-NEXT: vpcmpgtd %xmm0, %xmm5, %xmm7
+; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm7, %xmm3
; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm2
; AVX1-NEXT: vpcmpgtd %xmm2, %xmm5, %xmm0
; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm0
; AVX1-NEXT: vpcmpeqd %xmm0, %xmm7, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandnps %ymm12, %ymm0, %ymm0
-; AVX1-NEXT: vpandn %xmm11, %xmm1, %xmm1
+; AVX1-NEXT: vpandn %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm1
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm0
Modified: llvm/trunk/test/CodeGen/X86/vec_ssubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec_ssubo.ll?rev=365785&r1=365784&r2=365785&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec_ssubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec_ssubo.ll Thu Jul 11 07:45:03 2019
@@ -826,75 +826,71 @@ define <16 x i32> @ssubo_v16i32(<16 x i3
;
; AVX1-LABEL: ssubo_v16i32:
; AVX1: # %bb.0:
-; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm9
-; AVX1-NEXT: vpxor %xmm6, %xmm6, %xmm6
-; AVX1-NEXT: vpcmpgtd %xmm9, %xmm6, %xmm7
+; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4
+; AVX1-NEXT: vpxor %xmm9, %xmm9, %xmm9
+; AVX1-NEXT: vpcmpgtd %xmm4, %xmm9, %xmm7
; AVX1-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm8
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm7
-; AVX1-NEXT: vpcmpgtd %xmm7, %xmm6, %xmm4
-; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm10
-; AVX1-NEXT: vpcmpeqd %xmm8, %xmm10, %xmm8
-; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm4
-; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm11
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm6, %xmm4
-; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
-; AVX1-NEXT: vpcmpeqd %xmm11, %xmm4, %xmm11
-; AVX1-NEXT: vinsertf128 $1, %xmm8, %ymm11, %ymm11
-; AVX1-NEXT: vpsubd %xmm9, %xmm7, %xmm9
-; AVX1-NEXT: vpcmpgtd %xmm9, %xmm6, %xmm7
+; AVX1-NEXT: vpcmpgtd %xmm7, %xmm9, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpeqd %xmm8, %xmm6, %xmm8
+; AVX1-NEXT: vpsubd %xmm4, %xmm7, %xmm10
+; AVX1-NEXT: vpcmpgtd %xmm10, %xmm9, %xmm7
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT: vpcmpeqd %xmm7, %xmm10, %xmm7
+; AVX1-NEXT: vpcmpeqd %xmm7, %xmm6, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpandn %xmm6, %xmm8, %xmm6
+; AVX1-NEXT: vpcmpgtd %xmm3, %xmm9, %xmm7
; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm10
-; AVX1-NEXT: vpcmpgtd %xmm10, %xmm6, %xmm1
+; AVX1-NEXT: vpcmpgtd %xmm1, %xmm9, %xmm4
+; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm7, %xmm4, %xmm7
+; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpcmpgtd %xmm3, %xmm9, %xmm1
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
; AVX1-NEXT: vpcmpeqd %xmm1, %xmm4, %xmm1
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm7, %ymm1, %ymm1
-; AVX1-NEXT: vandnps %ymm1, %ymm11, %ymm1
-; AVX1-NEXT: vpandn %xmm7, %xmm8, %xmm4
-; AVX1-NEXT: vpackssdw %xmm4, %xmm1, %xmm8
+; AVX1-NEXT: vpandn %xmm1, %xmm7, %xmm1
+; AVX1-NEXT: vpackssdw %xmm6, %xmm1, %xmm8
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm4
-; AVX1-NEXT: vpcmpgtd %xmm4, %xmm6, %xmm7
-; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpcmpgtd %xmm1, %xmm6, %xmm3
-; AVX1-NEXT: vpxor %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm7, %xmm3, %xmm11
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm6, %xmm7
-; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm12
-; AVX1-NEXT: vpcmpgtd %xmm0, %xmm6, %xmm7
-; AVX1-NEXT: vpxor %xmm5, %xmm7, %xmm7
-; AVX1-NEXT: vpcmpeqd %xmm12, %xmm7, %xmm12
-; AVX1-NEXT: vinsertf128 $1, %xmm11, %ymm12, %ymm12
-; AVX1-NEXT: vpsubd %xmm4, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpgtd %xmm4, %xmm6, %xmm1
+; AVX1-NEXT: vpcmpgtd %xmm4, %xmm9, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm7
+; AVX1-NEXT: vpcmpgtd %xmm7, %xmm9, %xmm1
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpcmpeqd %xmm6, %xmm1, %xmm6
+; AVX1-NEXT: vpsubd %xmm4, %xmm7, %xmm7
+; AVX1-NEXT: vpcmpgtd %xmm7, %xmm9, %xmm4
+; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm1, %xmm1
; AVX1-NEXT: vpxor %xmm5, %xmm1, %xmm1
+; AVX1-NEXT: vpandn %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vpcmpgtd %xmm2, %xmm9, %xmm4
+; AVX1-NEXT: vpxor %xmm5, %xmm4, %xmm4
+; AVX1-NEXT: vpcmpgtd %xmm0, %xmm9, %xmm6
+; AVX1-NEXT: vpxor %xmm5, %xmm6, %xmm6
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm6, %xmm4
; AVX1-NEXT: vpsubd %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpcmpgtd %xmm2, %xmm6, %xmm0
+; AVX1-NEXT: vpcmpgtd %xmm2, %xmm9, %xmm0
; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm7, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm0, %xmm6, %xmm0
; AVX1-NEXT: vpxor %xmm5, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandnps %ymm0, %ymm12, %ymm0
-; AVX1-NEXT: vpandn %xmm1, %xmm11, %xmm1
+; AVX1-NEXT: vpandn %xmm0, %xmm4, %xmm0
; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vpacksswb %xmm8, %xmm0, %xmm1
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[1,1,2,3]
-; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
-; AVX1-NEXT: vpmovsxbd %xmm3, %xmm3
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[1,1,2,3]
+; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm4 = xmm1[2,3,0,1]
+; AVX1-NEXT: vpmovsxbd %xmm4, %xmm4
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,0,1]
; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm3, %ymm1
-; AVX1-NEXT: vmovdqa %xmm9, 48(%rdi)
-; AVX1-NEXT: vmovdqa %xmm10, 32(%rdi)
-; AVX1-NEXT: vmovdqa %xmm4, 16(%rdi)
+; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm4, %ymm1
+; AVX1-NEXT: vmovdqa %xmm10, 48(%rdi)
+; AVX1-NEXT: vmovdqa %xmm3, 32(%rdi)
+; AVX1-NEXT: vmovdqa %xmm7, 16(%rdi)
; AVX1-NEXT: vmovdqa %xmm2, (%rdi)
; AVX1-NEXT: retq
;
More information about the llvm-commits
mailing list