[llvm] r321969 - [DAG] Fix for Bug PR34620 - Allow SimplifyDemandedBits to look through bitcasts
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 7 11:09:40 PST 2018
Author: rksimon
Date: Sun Jan 7 11:09:40 2018
New Revision: 321969
URL: http://llvm.org/viewvc/llvm-project?rev=321969&view=rev
Log:
[DAG] Fix for Bug PR34620 - Allow SimplifyDemandedBits to look through bitcasts
Allow SimplifyDemandedBits to use TargetLoweringOpt::computeKnownBits to look through bitcasts. This can help simplifying in some cases where bitcasts of constants generated during or after legalization can't be folded away, and thus didn't get picked up by SimplifyDemandedBits. This fixes PR34620, where a redundant pand created during legalization from lowering and lshr <16xi8> wasn't being simplified due to the presence of a bitcasted build_vector as an operand.
Committed on the behalf of @sameconrad (Sam Conrad)
Differential Revision: https://reviews.llvm.org/D41643
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/combine-and.ll
llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll
llvm/trunk/test/CodeGen/X86/psubus.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=321969&r1=321968&r2=321969&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Sun Jan 7 11:09:40 2018
@@ -1220,6 +1220,12 @@ bool TargetLowering::SimplifyDemandedBit
Sign, ShAmt));
}
}
+ // If this is a bitcast, let computeKnownBits handle it. Only do this on a
+ // recursive call where Known may be useful to the caller.
+ if (Depth > 0) {
+ TLO.DAG.computeKnownBits(Op, Known, Depth);
+ return false;
+ }
break;
case ISD::ADD:
case ISD::MUL:
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=321969&r1=321968&r2=321969&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Sun Jan 7 11:09:40 2018
@@ -1773,18 +1773,15 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512BW-NEXT: vpcmpleb %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: addq %rcx, %rax
-; AVX512BW-NEXT: kxorq %k0, %k0, %k0
-; AVX512BW-NEXT: kmovq %k0, %rcx
-; AVX512BW-NEXT: orq %rax, %rcx
; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
-; AVX512BW-NEXT: vpcmpleb %zmm0, %zmm1, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rcx
; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpleb %zmm0, %zmm1, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rdx
+; AVX512BW-NEXT: addq %rcx, %rdx
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: addq %rdx, %rax
; AVX512BW-NEXT: addq %rdi, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -1795,8 +1792,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
-; AVX512F-32-NEXT: subl $68, %esp
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT: subl $60, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: .cfi_offset %esi, -12
; AVX512F-32-NEXT: .cfi_offset %ebx, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
@@ -2344,10 +2341,6 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kxorq %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
@@ -2362,7 +2355,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: adcl %ebx, %edx
-; AVX512F-32-NEXT: addl $68, %esp
+; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: vzeroupper
@@ -2478,18 +2471,15 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512BW-NEXT: vpcmpleub %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
; AVX512BW-NEXT: addq %rcx, %rax
-; AVX512BW-NEXT: kxorq %k0, %k0, %k0
-; AVX512BW-NEXT: kmovq %k0, %rcx
-; AVX512BW-NEXT: orq %rax, %rcx
; AVX512BW-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
-; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rcx
; AVX512BW-NEXT: addq %rax, %rcx
+; AVX512BW-NEXT: vpcmpnltub %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovq %k0, %rdx
+; AVX512BW-NEXT: addq %rcx, %rdx
; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: addq %rdx, %rax
; AVX512BW-NEXT: addq %rdi, %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -2500,8 +2490,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
-; AVX512F-32-NEXT: subl $68, %esp
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT: subl $60, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
; AVX512F-32-NEXT: .cfi_offset %esi, -12
; AVX512F-32-NEXT: .cfi_offset %ebx, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
@@ -3049,10 +3039,6 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: kxorq %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: orl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: vpcmpneqb %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
@@ -3067,7 +3053,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: adcl %ebx, %edx
-; AVX512F-32-NEXT: addl $68, %esp
+; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %ebx
; AVX512F-32-NEXT: vzeroupper
@@ -3172,24 +3158,24 @@ define i32 @test_mask_cmp_w_512(<32 x i1
; AVX512BW-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: addl %ecx, %eax
-; AVX512BW-NEXT: kxord %k0, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %ecx
-; AVX512BW-NEXT: orl %eax, %ecx
; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
-; AVX512BW-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %ecx
; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %edx
+; AVX512BW-NEXT: addl %ecx, %edx
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: addl %edx, %eax
; AVX512BW-NEXT: addl %edi, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_cmp_w_512:
; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %esi, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
@@ -3200,19 +3186,17 @@ define i32 @test_mask_cmp_w_512(<32 x i1
; AVX512F-32-NEXT: vpcmplew %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: addl %edx, %eax
-; AVX512F-32-NEXT: kxord %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %edx
-; AVX512F-32-NEXT: orl %eax, %edx
; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %edx, %eax
-; AVX512F-32-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %edx
; AVX512F-32-NEXT: addl %eax, %edx
+; AVX512F-32-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %esi
+; AVX512F-32-NEXT: addl %edx, %esi
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %edx, %eax
+; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
@@ -3315,24 +3299,24 @@ define i32 @test_mask_ucmp_w_512(<32 x i
; AVX512BW-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: addl %ecx, %eax
-; AVX512BW-NEXT: kxord %k0, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %ecx
-; AVX512BW-NEXT: orl %eax, %ecx
; AVX512BW-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
-; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %ecx
; AVX512BW-NEXT: addl %eax, %ecx
+; AVX512BW-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512BW-NEXT: kmovd %k0, %edx
+; AVX512BW-NEXT: addl %ecx, %edx
; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: addl %edx, %eax
; AVX512BW-NEXT: addl %edi, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-32-LABEL: test_mask_ucmp_w_512:
; AVX512F-32: # %bb.0:
+; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
+; AVX512F-32-NEXT: .cfi_offset %esi, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1}
@@ -3343,19 +3327,17 @@ define i32 @test_mask_ucmp_w_512(<32 x i
; AVX512F-32-NEXT: vpcmpleuw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: addl %edx, %eax
-; AVX512F-32-NEXT: kxord %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %edx
-; AVX512F-32-NEXT: orl %eax, %edx
; AVX512F-32-NEXT: vpcmpneqw %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %edx, %eax
-; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %edx
; AVX512F-32-NEXT: addl %eax, %edx
+; AVX512F-32-NEXT: vpcmpnltuw %zmm1, %zmm0, %k0 {%k1}
+; AVX512F-32-NEXT: kmovd %k0, %esi
+; AVX512F-32-NEXT: addl %edx, %esi
; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0 {%k1}
; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %edx, %eax
+; AVX512F-32-NEXT: addl %esi, %eax
; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
Modified: llvm/trunk/test/CodeGen/X86/combine-and.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-and.ll?rev=321969&r1=321968&r2=321969&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-and.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-and.ll Sun Jan 7 11:09:40 2018
@@ -291,7 +291,6 @@ define <16 x i8> @PR34620(<16 x i8> %a0,
; CHECK: # %bb.0:
; CHECK-NEXT: psrlw $1, %xmm0
; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT: pand {{.*}}(%rip), %xmm0
; CHECK-NEXT: paddb %xmm1, %xmm0
; CHECK-NEXT: retq
%1 = lshr <16 x i8> %a0, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
Modified: llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll?rev=321969&r1=321968&r2=321969&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-fcopysign.ll Sun Jan 7 11:09:40 2018
@@ -10,20 +10,13 @@
define <4 x float> @combine_vec_fcopysign_pos_constant0(<4 x float> %x) {
; SSE-LABEL: combine_vec_fcopysign_pos_constant0:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [2.000000e+00,2.000000e+00,2.000000e+00,2.000000e+00]
-; SSE-NEXT: andps {{.*}}(%rip), %xmm1
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_fcopysign_pos_constant0:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 2.0, float 2.0, float 2.0, float 2.0>)
ret <4 x float> %1
@@ -32,19 +25,13 @@ define <4 x float> @combine_vec_fcopysig
define <4 x float> @combine_vec_fcopysign_pos_constant1(<4 x float> %x) {
; SSE-LABEL: combine_vec_fcopysign_pos_constant1:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [0.000000e+00,2.000000e+00,4.000000e+00,8.000000e+00]
-; SSE-NEXT: andps {{.*}}(%rip), %xmm1
; SSE-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_fcopysign_pos_constant1:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
-; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float 0.0, float 2.0, float 4.0, float 8.0>)
ret <4 x float> %1
@@ -70,19 +57,12 @@ define <4 x float> @combine_vec_fcopysig
define <4 x float> @combine_vec_fcopysign_neg_constant0(<4 x float> %x) {
; SSE-LABEL: combine_vec_fcopysign_neg_constant0:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [-2.000000e+00,-2.000000e+00,-2.000000e+00,-2.000000e+00]
-; SSE-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: orps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_fcopysign_neg_constant0:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; AVX-NEXT: vandps %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -2.0, float -2.0, float -2.0, float -2.0>)
@@ -92,18 +72,12 @@ define <4 x float> @combine_vec_fcopysig
define <4 x float> @combine_vec_fcopysign_neg_constant1(<4 x float> %x) {
; SSE-LABEL: combine_vec_fcopysign_neg_constant1:
; SSE: # %bb.0:
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [-0.000000e+00,-2.000000e+00,-4.000000e+00,-8.000000e+00]
-; SSE-NEXT: andps {{.*}}(%rip), %xmm1
-; SSE-NEXT: andps {{.*}}(%rip), %xmm0
-; SSE-NEXT: orps %xmm1, %xmm0
+; SSE-NEXT: orps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_fcopysign_neg_constant1:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX-NEXT: vandps {{.*}}(%rip), %xmm1, %xmm1
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.copysign.v4f32(<4 x float> %x, <4 x float> <float -0.0, float -2.0, float -4.0, float -8.0>)
@@ -113,15 +87,12 @@ define <4 x float> @combine_vec_fcopysig
define <4 x float> @combine_vec_fcopysign_fneg_fabs_sgn(<4 x float> %x, <4 x float> %y) {
; SSE-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
; SSE: # %bb.0:
-; SSE-NEXT: andps {{.*}}(%rip), %xmm0
; SSE-NEXT: orps {{.*}}(%rip), %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: combine_vec_fcopysign_fneg_fabs_sgn:
; AVX: # %bb.0:
; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm1
-; AVX-NEXT: vbroadcastss {{.*}}(%rip), %xmm2
-; AVX-NEXT: vandps %xmm2, %xmm0, %xmm0
; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0
; AVX-NEXT: retq
%1 = call <4 x float> @llvm.fabs.v4f32(<4 x float> %y)
Modified: llvm/trunk/test/CodeGen/X86/psubus.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/psubus.ll?rev=321969&r1=321968&r2=321969&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/psubus.ll (original)
+++ llvm/trunk/test/CodeGen/X86/psubus.ll Sun Jan 7 11:09:40 2018
@@ -466,11 +466,11 @@ define <8 x i16> @test13(<8 x i16> %x, <
; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm2, %xmm6
; SSE2-NEXT: pxor %xmm4, %xmm6
-; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm3, %xmm4
+; SSE2-NEXT: por %xmm3, %xmm4
; SSE2-NEXT: pcmpgtd %xmm4, %xmm2
; SSE2-NEXT: packssdw %xmm6, %xmm2
; SSE2-NEXT: psubd %xmm1, %xmm3
@@ -494,11 +494,11 @@ define <8 x i16> @test13(<8 x i16> %x, <
; SSSE3-NEXT: psubd %xmm2, %xmm0
; SSSE3-NEXT: movdqa %xmm2, %xmm6
; SSSE3-NEXT: pxor %xmm3, %xmm6
-; SSSE3-NEXT: pxor %xmm3, %xmm5
+; SSSE3-NEXT: por %xmm3, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm6
; SSSE3-NEXT: movdqa %xmm1, %xmm2
; SSSE3-NEXT: pxor %xmm3, %xmm2
-; SSSE3-NEXT: pxor %xmm4, %xmm3
+; SSSE3-NEXT: por %xmm4, %xmm3
; SSSE3-NEXT: pcmpgtd %xmm3, %xmm2
; SSSE3-NEXT: packssdw %xmm6, %xmm2
; SSSE3-NEXT: psubd %xmm1, %xmm4
@@ -520,11 +520,11 @@ define <8 x i16> @test13(<8 x i16> %x, <
; SSE41-NEXT: psubd %xmm1, %xmm4
; SSE41-NEXT: movdqa %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm5, %xmm0
-; SSE41-NEXT: pxor %xmm5, %xmm6
+; SSE41-NEXT: por %xmm5, %xmm6
; SSE41-NEXT: pcmpgtd %xmm6, %xmm0
; SSE41-NEXT: movdqa %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm5, %xmm1
-; SSE41-NEXT: pxor %xmm3, %xmm5
+; SSE41-NEXT: por %xmm3, %xmm5
; SSE41-NEXT: pcmpgtd %xmm5, %xmm1
; SSE41-NEXT: packssdw %xmm1, %xmm0
; SSE41-NEXT: psubd %xmm2, %xmm3
@@ -541,12 +541,12 @@ define <8 x i16> @test13(<8 x i16> %x, <
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm4
+; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm4
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm5
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6
-; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm3
+; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm2
@@ -564,7 +564,7 @@ define <8 x i16> @test13(<8 x i16> %x, <
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpcmpgtd %ymm2, %ymm3, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
@@ -610,26 +610,26 @@ define <16 x i8> @test14(<16 x i8> %x, <
; SSE2-NEXT: movdqa %xmm4, %xmm9
; SSE2-NEXT: pxor %xmm0, %xmm9
; SSE2-NEXT: psubd %xmm5, %xmm4
-; SSE2-NEXT: pxor %xmm0, %xmm5
+; SSE2-NEXT: por %xmm0, %xmm5
; SSE2-NEXT: pcmpgtd %xmm9, %xmm5
; SSE2-NEXT: movdqa {{.*#+}} xmm9 = [255,255,255,255]
; SSE2-NEXT: pand %xmm9, %xmm5
; SSE2-NEXT: movdqa %xmm3, %xmm7
; SSE2-NEXT: pxor %xmm0, %xmm7
; SSE2-NEXT: psubd %xmm10, %xmm3
-; SSE2-NEXT: pxor %xmm0, %xmm10
+; SSE2-NEXT: por %xmm0, %xmm10
; SSE2-NEXT: pcmpgtd %xmm7, %xmm10
; SSE2-NEXT: pand %xmm9, %xmm10
; SSE2-NEXT: packuswb %xmm5, %xmm10
; SSE2-NEXT: movdqa %xmm2, %xmm5
; SSE2-NEXT: pxor %xmm0, %xmm5
; SSE2-NEXT: psubd %xmm6, %xmm2
-; SSE2-NEXT: pxor %xmm0, %xmm6
+; SSE2-NEXT: por %xmm0, %xmm6
; SSE2-NEXT: pcmpgtd %xmm5, %xmm6
; SSE2-NEXT: pand %xmm9, %xmm6
; SSE2-NEXT: movdqa %xmm1, %xmm5
; SSE2-NEXT: pxor %xmm0, %xmm5
-; SSE2-NEXT: pxor %xmm8, %xmm0
+; SSE2-NEXT: por %xmm8, %xmm0
; SSE2-NEXT: pcmpgtd %xmm5, %xmm0
; SSE2-NEXT: pand %xmm9, %xmm0
; SSE2-NEXT: packuswb %xmm6, %xmm0
@@ -662,27 +662,27 @@ define <16 x i8> @test14(<16 x i8> %x, <
; SSSE3-NEXT: movdqa %xmm2, %xmm9
; SSSE3-NEXT: pxor %xmm0, %xmm9
; SSSE3-NEXT: psubd %xmm5, %xmm2
-; SSSE3-NEXT: pxor %xmm0, %xmm5
+; SSSE3-NEXT: por %xmm0, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm9, %xmm5
; SSSE3-NEXT: movdqa {{.*#+}} xmm9 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: pshufb %xmm9, %xmm5
; SSSE3-NEXT: movdqa %xmm1, %xmm6
; SSSE3-NEXT: pxor %xmm0, %xmm6
; SSSE3-NEXT: psubd %xmm10, %xmm1
-; SSSE3-NEXT: pxor %xmm0, %xmm10
+; SSSE3-NEXT: por %xmm0, %xmm10
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm10
; SSSE3-NEXT: pshufb %xmm9, %xmm10
; SSSE3-NEXT: punpckldq {{.*#+}} xmm10 = xmm10[0],xmm5[0],xmm10[1],xmm5[1]
; SSSE3-NEXT: movdqa %xmm4, %xmm5
; SSSE3-NEXT: pxor %xmm0, %xmm5
; SSSE3-NEXT: psubd %xmm7, %xmm4
-; SSSE3-NEXT: pxor %xmm0, %xmm7
+; SSSE3-NEXT: por %xmm0, %xmm7
; SSSE3-NEXT: pcmpgtd %xmm5, %xmm7
; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
; SSSE3-NEXT: pshufb %xmm5, %xmm7
; SSSE3-NEXT: movdqa %xmm3, %xmm6
; SSSE3-NEXT: pxor %xmm0, %xmm6
-; SSSE3-NEXT: pxor %xmm8, %xmm0
+; SSSE3-NEXT: por %xmm8, %xmm0
; SSSE3-NEXT: pcmpgtd %xmm6, %xmm0
; SSSE3-NEXT: pshufb %xmm5, %xmm0
; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1]
@@ -713,27 +713,27 @@ define <16 x i8> @test14(<16 x i8> %x, <
; SSE41-NEXT: movdqa %xmm4, %xmm7
; SSE41-NEXT: pxor %xmm5, %xmm7
; SSE41-NEXT: psubd %xmm6, %xmm4
-; SSE41-NEXT: pxor %xmm5, %xmm6
+; SSE41-NEXT: por %xmm5, %xmm6
; SSE41-NEXT: pcmpgtd %xmm7, %xmm6
; SSE41-NEXT: movdqa {{.*#+}} xmm10 = <u,u,u,u,0,4,8,12,u,u,u,u,u,u,u,u>
; SSE41-NEXT: pshufb %xmm10, %xmm6
; SSE41-NEXT: movdqa %xmm3, %xmm7
; SSE41-NEXT: pxor %xmm5, %xmm7
; SSE41-NEXT: psubd %xmm9, %xmm3
-; SSE41-NEXT: pxor %xmm5, %xmm9
+; SSE41-NEXT: por %xmm5, %xmm9
; SSE41-NEXT: pcmpgtd %xmm7, %xmm9
; SSE41-NEXT: pshufb %xmm10, %xmm9
; SSE41-NEXT: punpckldq {{.*#+}} xmm9 = xmm9[0],xmm6[0],xmm9[1],xmm6[1]
; SSE41-NEXT: movdqa %xmm1, %xmm6
; SSE41-NEXT: pxor %xmm5, %xmm6
; SSE41-NEXT: psubd %xmm0, %xmm1
-; SSE41-NEXT: pxor %xmm5, %xmm0
+; SSE41-NEXT: por %xmm5, %xmm0
; SSE41-NEXT: pcmpgtd %xmm6, %xmm0
; SSE41-NEXT: movdqa {{.*#+}} xmm6 = <0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u>
; SSE41-NEXT: pshufb %xmm6, %xmm0
; SSE41-NEXT: movdqa %xmm2, %xmm7
; SSE41-NEXT: pxor %xmm5, %xmm7
-; SSE41-NEXT: pxor %xmm8, %xmm5
+; SSE41-NEXT: por %xmm8, %xmm5
; SSE41-NEXT: pcmpgtd %xmm7, %xmm5
; SSE41-NEXT: pshufb %xmm6, %xmm5
; SSE41-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1]
@@ -762,18 +762,18 @@ define <16 x i8> @test14(<16 x i8> %x, <
; AVX1-NEXT: vmovdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
; AVX1-NEXT: vpxor %xmm6, %xmm7, %xmm3
-; AVX1-NEXT: vpxor %xmm6, %xmm0, %xmm4
+; AVX1-NEXT: vpor %xmm6, %xmm0, %xmm4
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpxor %xmm6, %xmm2, %xmm4
-; AVX1-NEXT: vpxor %xmm6, %xmm10, %xmm5
+; AVX1-NEXT: vpor %xmm6, %xmm10, %xmm5
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm11
; AVX1-NEXT: vpxor %xmm6, %xmm1, %xmm4
-; AVX1-NEXT: vpxor %xmm6, %xmm9, %xmm5
+; AVX1-NEXT: vpor %xmm6, %xmm9, %xmm5
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpxor %xmm6, %xmm5, %xmm3
-; AVX1-NEXT: vpxor %xmm6, %xmm8, %xmm6
+; AVX1-NEXT: vpor %xmm6, %xmm8, %xmm6
; AVX1-NEXT: vpcmpgtd %xmm3, %xmm6, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpacksswb %xmm11, %xmm3, %xmm3
@@ -800,12 +800,12 @@ define <16 x i8> @test14(<16 x i8> %x, <
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm5
-; AVX2-NEXT: vpxor %ymm4, %ymm0, %ymm6
+; AVX2-NEXT: vpor %ymm4, %ymm0, %ymm6
; AVX2-NEXT: vpcmpgtd %ymm5, %ymm6, %ymm5
; AVX2-NEXT: vextracti128 $1, %ymm5, %xmm6
; AVX2-NEXT: vpackssdw %xmm6, %xmm5, %xmm5
; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm6
-; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm4
+; AVX2-NEXT: vpor %ymm4, %ymm3, %ymm4
; AVX2-NEXT: vpcmpgtd %ymm6, %ymm4, %ymm4
; AVX2-NEXT: vextracti128 $1, %ymm4, %xmm6
; AVX2-NEXT: vpackssdw %xmm6, %xmm4, %xmm4
@@ -853,11 +853,11 @@ define <8 x i16> @test15(<8 x i16> %x, <
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: psubd %xmm2, %xmm3
; SSE2-NEXT: pxor %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
; SSE2-NEXT: packssdw %xmm5, %xmm4
; SSE2-NEXT: psubd %xmm1, %xmm0
@@ -879,11 +879,11 @@ define <8 x i16> @test15(<8 x i16> %x, <
; SSSE3-NEXT: movdqa %xmm0, %xmm5
; SSSE3-NEXT: psubd %xmm2, %xmm0
; SSSE3-NEXT: pxor %xmm4, %xmm2
-; SSSE3-NEXT: pxor %xmm4, %xmm5
+; SSSE3-NEXT: por %xmm4, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
; SSSE3-NEXT: movdqa %xmm1, %xmm2
; SSSE3-NEXT: pxor %xmm4, %xmm2
-; SSSE3-NEXT: pxor %xmm3, %xmm4
+; SSSE3-NEXT: por %xmm3, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
; SSSE3-NEXT: packssdw %xmm5, %xmm4
; SSSE3-NEXT: psubd %xmm1, %xmm3
@@ -904,11 +904,11 @@ define <8 x i16> @test15(<8 x i16> %x, <
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: psubd %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm4, %xmm1
-; SSE41-NEXT: pxor %xmm4, %xmm5
+; SSE41-NEXT: por %xmm4, %xmm5
; SSE41-NEXT: pcmpgtd %xmm1, %xmm5
; SSE41-NEXT: movdqa %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm4, %xmm1
-; SSE41-NEXT: pxor %xmm3, %xmm4
+; SSE41-NEXT: por %xmm3, %xmm4
; SSE41-NEXT: pcmpgtd %xmm1, %xmm4
; SSE41-NEXT: packssdw %xmm4, %xmm5
; SSE41-NEXT: psubd %xmm2, %xmm3
@@ -926,11 +926,11 @@ define <8 x i16> @test15(<8 x i16> %x, <
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm5
+; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm5
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6
-; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm3
+; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm2
@@ -948,7 +948,7 @@ define <8 x i16> @test15(<8 x i16> %x, <
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
@@ -987,11 +987,11 @@ define <8 x i16> @test16(<8 x i16> %x, <
; SSE2-NEXT: movdqa %xmm3, %xmm5
; SSE2-NEXT: psubd %xmm2, %xmm3
; SSE2-NEXT: pxor %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm4, %xmm5
+; SSE2-NEXT: por %xmm4, %xmm5
; SSE2-NEXT: pcmpgtd %xmm2, %xmm5
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
-; SSE2-NEXT: pxor %xmm0, %xmm4
+; SSE2-NEXT: por %xmm0, %xmm4
; SSE2-NEXT: pcmpgtd %xmm2, %xmm4
; SSE2-NEXT: packssdw %xmm5, %xmm4
; SSE2-NEXT: psubd %xmm1, %xmm0
@@ -1013,11 +1013,11 @@ define <8 x i16> @test16(<8 x i16> %x, <
; SSSE3-NEXT: movdqa %xmm0, %xmm5
; SSSE3-NEXT: psubd %xmm2, %xmm0
; SSSE3-NEXT: pxor %xmm4, %xmm2
-; SSSE3-NEXT: pxor %xmm4, %xmm5
+; SSSE3-NEXT: por %xmm4, %xmm5
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm5
; SSSE3-NEXT: movdqa %xmm1, %xmm2
; SSSE3-NEXT: pxor %xmm4, %xmm2
-; SSSE3-NEXT: pxor %xmm3, %xmm4
+; SSSE3-NEXT: por %xmm3, %xmm4
; SSSE3-NEXT: pcmpgtd %xmm2, %xmm4
; SSSE3-NEXT: packssdw %xmm5, %xmm4
; SSSE3-NEXT: psubd %xmm1, %xmm3
@@ -1038,11 +1038,11 @@ define <8 x i16> @test16(<8 x i16> %x, <
; SSE41-NEXT: movdqa %xmm0, %xmm5
; SSE41-NEXT: psubd %xmm1, %xmm0
; SSE41-NEXT: pxor %xmm4, %xmm1
-; SSE41-NEXT: pxor %xmm4, %xmm5
+; SSE41-NEXT: por %xmm4, %xmm5
; SSE41-NEXT: pcmpgtd %xmm1, %xmm5
; SSE41-NEXT: movdqa %xmm2, %xmm1
; SSE41-NEXT: pxor %xmm4, %xmm1
-; SSE41-NEXT: pxor %xmm3, %xmm4
+; SSE41-NEXT: por %xmm3, %xmm4
; SSE41-NEXT: pcmpgtd %xmm1, %xmm4
; SSE41-NEXT: packssdw %xmm4, %xmm5
; SSE41-NEXT: psubd %xmm2, %xmm3
@@ -1060,11 +1060,11 @@ define <8 x i16> @test16(<8 x i16> %x, <
; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
; AVX1-NEXT: vpxor %xmm3, %xmm1, %xmm4
-; AVX1-NEXT: vpxor %xmm3, %xmm0, %xmm5
+; AVX1-NEXT: vpor %xmm3, %xmm0, %xmm5
; AVX1-NEXT: vpcmpgtd %xmm4, %xmm5, %xmm4
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5
; AVX1-NEXT: vpxor %xmm3, %xmm5, %xmm6
-; AVX1-NEXT: vpxor %xmm3, %xmm2, %xmm3
+; AVX1-NEXT: vpor %xmm3, %xmm2, %xmm3
; AVX1-NEXT: vpcmpgtd %xmm6, %xmm3, %xmm3
; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3
; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm2
@@ -1082,7 +1082,7 @@ define <8 x i16> @test16(<8 x i16> %x, <
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
; AVX2-NEXT: vpxor %ymm2, %ymm1, %ymm3
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpor %ymm2, %ymm0, %ymm2
; AVX2-NEXT: vpcmpgtd %ymm3, %ymm2, %ymm2
; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX2-NEXT: vpackssdw %xmm3, %xmm2, %xmm2
More information about the llvm-commits
mailing list