[llvm] r324124 - [X86] Add tests for missed opportunities to use ptest for all ones comparison.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 2 11:34:10 PST 2018
Author: ctopper
Date: Fri Feb 2 11:34:10 2018
New Revision: 324124
URL: http://llvm.org/viewvc/llvm-project?rev=324124&view=rev
Log:
[X86] Add tests for missed opportunities to use ptest for all ones comparison.
Also rename the test from pr12312.ll to ptest.ll so its more recognizable.
Added:
llvm/trunk/test/CodeGen/X86/ptest.ll
Removed:
llvm/trunk/test/CodeGen/X86/pr12312.ll
Removed: llvm/trunk/test/CodeGen/X86/pr12312.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr12312.ll?rev=324123&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr12312.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr12312.ll (removed)
@@ -1,243 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix=SSE41
-; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix=AVX
-
-define i32 @veccond128(<4 x i32> %input) {
-; SSE41-LABEL: veccond128:
-; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: ptest %xmm0, %xmm0
-; SSE41-NEXT: je .LBB0_2
-; SSE41-NEXT: # %bb.1: # %if-true-block
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: retq
-; SSE41-NEXT: .LBB0_2: # %endif-block
-; SSE41-NEXT: movl $1, %eax
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: veccond128:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vptest %xmm0, %xmm0
-; AVX-NEXT: je .LBB0_2
-; AVX-NEXT: # %bb.1: # %if-true-block
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: retq
-; AVX-NEXT: .LBB0_2: # %endif-block
-; AVX-NEXT: movl $1, %eax
-; AVX-NEXT: retq
-entry:
- %0 = bitcast <4 x i32> %input to i128
- %1 = icmp ne i128 %0, 0
- br i1 %1, label %if-true-block, label %endif-block
-if-true-block:
- ret i32 0
-endif-block:
- ret i32 1
-}
-
-define i32 @veccond256(<8 x i32> %input) {
-; SSE41-LABEL: veccond256:
-; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: ptest %xmm0, %xmm0
-; SSE41-NEXT: je .LBB1_2
-; SSE41-NEXT: # %bb.1: # %if-true-block
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: retq
-; SSE41-NEXT: .LBB1_2: # %endif-block
-; SSE41-NEXT: movl $1, %eax
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: veccond256:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vptest %ymm0, %ymm0
-; AVX-NEXT: je .LBB1_2
-; AVX-NEXT: # %bb.1: # %if-true-block
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
-; AVX-NEXT: .LBB1_2: # %endif-block
-; AVX-NEXT: movl $1, %eax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
-entry:
- %0 = bitcast <8 x i32> %input to i256
- %1 = icmp ne i256 %0, 0
- br i1 %1, label %if-true-block, label %endif-block
-if-true-block:
- ret i32 0
-endif-block:
- ret i32 1
-}
-
-define i32 @veccond512(<16 x i32> %input) {
-; SSE41-LABEL: veccond512:
-; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: por %xmm3, %xmm1
-; SSE41-NEXT: por %xmm2, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: ptest %xmm1, %xmm1
-; SSE41-NEXT: je .LBB2_2
-; SSE41-NEXT: # %bb.1: # %if-true-block
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: retq
-; SSE41-NEXT: .LBB2_2: # %endif-block
-; SSE41-NEXT: movl $1, %eax
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: veccond512:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vptest %ymm0, %ymm0
-; AVX-NEXT: je .LBB2_2
-; AVX-NEXT: # %bb.1: # %if-true-block
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
-; AVX-NEXT: .LBB2_2: # %endif-block
-; AVX-NEXT: movl $1, %eax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
-entry:
- %0 = bitcast <16 x i32> %input to i512
- %1 = icmp ne i512 %0, 0
- br i1 %1, label %if-true-block, label %endif-block
-if-true-block:
- ret i32 0
-endif-block:
- ret i32 1
-}
-
-define i32 @vectest128(<4 x i32> %input) {
-; SSE41-LABEL: vectest128:
-; SSE41: # %bb.0:
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: ptest %xmm0, %xmm0
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: vectest128:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: vptest %xmm0, %xmm0
-; AVX-NEXT: setne %al
-; AVX-NEXT: retq
- %t0 = bitcast <4 x i32> %input to i128
- %t1 = icmp ne i128 %t0, 0
- %t2 = zext i1 %t1 to i32
- ret i32 %t2
-}
-
-define i32 @vectest256(<8 x i32> %input) {
-; SSE41-LABEL: vectest256:
-; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: ptest %xmm0, %xmm0
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: vectest256:
-; AVX: # %bb.0:
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: vptest %ymm0, %ymm0
-; AVX-NEXT: setne %al
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
- %t0 = bitcast <8 x i32> %input to i256
- %t1 = icmp ne i256 %t0, 0
- %t2 = zext i1 %t1 to i32
- ret i32 %t2
-}
-
-define i32 @vectest512(<16 x i32> %input) {
-; SSE41-LABEL: vectest512:
-; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm3, %xmm1
-; SSE41-NEXT: por %xmm2, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: xorl %eax, %eax
-; SSE41-NEXT: ptest %xmm1, %xmm1
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: vectest512:
-; AVX: # %bb.0:
-; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: xorl %eax, %eax
-; AVX-NEXT: vptest %ymm0, %ymm0
-; AVX-NEXT: setne %al
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
- %t0 = bitcast <16 x i32> %input to i512
- %t1 = icmp ne i512 %t0, 0
- %t2 = zext i1 %t1 to i32
- ret i32 %t2
-}
-
-define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) {
-; SSE41-LABEL: vecsel128:
-; SSE41: # %bb.0:
-; SSE41-NEXT: ptest %xmm0, %xmm0
-; SSE41-NEXT: cmovel %esi, %edi
-; SSE41-NEXT: movl %edi, %eax
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: vecsel128:
-; AVX: # %bb.0:
-; AVX-NEXT: vptest %xmm0, %xmm0
-; AVX-NEXT: cmovel %esi, %edi
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: retq
- %t0 = bitcast <4 x i32> %input to i128
- %t1 = icmp ne i128 %t0, 0
- %t2 = select i1 %t1, i32 %a, i32 %b
- ret i32 %t2
-}
-
-define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) {
-; SSE41-LABEL: vecsel256:
-; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm1, %xmm0
-; SSE41-NEXT: ptest %xmm0, %xmm0
-; SSE41-NEXT: cmovel %esi, %edi
-; SSE41-NEXT: movl %edi, %eax
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: vecsel256:
-; AVX: # %bb.0:
-; AVX-NEXT: vptest %ymm0, %ymm0
-; AVX-NEXT: cmovel %esi, %edi
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
- %t0 = bitcast <8 x i32> %input to i256
- %t1 = icmp ne i256 %t0, 0
- %t2 = select i1 %t1, i32 %a, i32 %b
- ret i32 %t2
-}
-
-define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
-; SSE41-LABEL: vecsel512:
-; SSE41: # %bb.0:
-; SSE41-NEXT: por %xmm3, %xmm1
-; SSE41-NEXT: por %xmm2, %xmm1
-; SSE41-NEXT: por %xmm0, %xmm1
-; SSE41-NEXT: ptest %xmm1, %xmm1
-; SSE41-NEXT: cmovel %esi, %edi
-; SSE41-NEXT: movl %edi, %eax
-; SSE41-NEXT: retq
-;
-; AVX-LABEL: vecsel512:
-; AVX: # %bb.0:
-; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
-; AVX-NEXT: vptest %ymm0, %ymm0
-; AVX-NEXT: cmovel %esi, %edi
-; AVX-NEXT: movl %edi, %eax
-; AVX-NEXT: vzeroupper
-; AVX-NEXT: retq
- %t0 = bitcast <16 x i32> %input to i512
- %t1 = icmp ne i512 %t0, 0
- %t2 = select i1 %t1, i32 %a, i32 %b
- ret i32 %t2
-}
-
Added: llvm/trunk/test/CodeGen/X86/ptest.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/ptest.ll?rev=324124&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/ptest.ll (added)
+++ llvm/trunk/test/CodeGen/X86/ptest.ll Fri Feb 2 11:34:10 2018
@@ -0,0 +1,511 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse4.1,-avx < %s | FileCheck %s --check-prefix=SSE41
+; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx,-avx2 < %s | FileCheck %s --check-prefix=AVX
+
+define i32 @veccond128(<4 x i32> %input) {
+; SSE41-LABEL: veccond128:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: je .LBB0_2
+; SSE41-NEXT: # %bb.1: # %if-true-block
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: retq
+; SSE41-NEXT: .LBB0_2: # %endif-block
+; SSE41-NEXT: movl $1, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: veccond128:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: je .LBB0_2
+; AVX-NEXT: # %bb.1: # %if-true-block
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB0_2: # %endif-block
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i32> %input to i128
+ %1 = icmp ne i128 %0, 0
+ br i1 %1, label %if-true-block, label %endif-block
+if-true-block:
+ ret i32 0
+endif-block:
+ ret i32 1
+}
+
+define i32 @veccond256(<8 x i32> %input) {
+; SSE41-LABEL: veccond256:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: je .LBB1_2
+; SSE41-NEXT: # %bb.1: # %if-true-block
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: retq
+; SSE41-NEXT: .LBB1_2: # %endif-block
+; SSE41-NEXT: movl $1, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: veccond256:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: je .LBB1_2
+; AVX-NEXT: # %bb.1: # %if-true-block
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB1_2: # %endif-block
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+entry:
+ %0 = bitcast <8 x i32> %input to i256
+ %1 = icmp ne i256 %0, 0
+ br i1 %1, label %if-true-block, label %endif-block
+if-true-block:
+ ret i32 0
+endif-block:
+ ret i32 1
+}
+
+define i32 @veccond512(<16 x i32> %input) {
+; SSE41-LABEL: veccond512:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: por %xmm3, %xmm1
+; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: ptest %xmm1, %xmm1
+; SSE41-NEXT: je .LBB2_2
+; SSE41-NEXT: # %bb.1: # %if-true-block
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: retq
+; SSE41-NEXT: .LBB2_2: # %endif-block
+; SSE41-NEXT: movl $1, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: veccond512:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: je .LBB2_2
+; AVX-NEXT: # %bb.1: # %if-true-block
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB2_2: # %endif-block
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+entry:
+ %0 = bitcast <16 x i32> %input to i512
+ %1 = icmp ne i512 %0, 0
+ br i1 %1, label %if-true-block, label %endif-block
+if-true-block:
+ ret i32 0
+endif-block:
+ ret i32 1
+}
+
+define i32 @vectest128(<4 x i32> %input) {
+; SSE41-LABEL: vectest128:
+; SSE41: # %bb.0:
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: setne %al
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vectest128:
+; AVX: # %bb.0:
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: setne %al
+; AVX-NEXT: retq
+ %t0 = bitcast <4 x i32> %input to i128
+ %t1 = icmp ne i128 %t0, 0
+ %t2 = zext i1 %t1 to i32
+ ret i32 %t2
+}
+
+define i32 @vectest256(<8 x i32> %input) {
+; SSE41-LABEL: vectest256:
+; SSE41: # %bb.0:
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: setne %al
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vectest256:
+; AVX: # %bb.0:
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: setne %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %t0 = bitcast <8 x i32> %input to i256
+ %t1 = icmp ne i256 %t0, 0
+ %t2 = zext i1 %t1 to i32
+ ret i32 %t2
+}
+
+define i32 @vectest512(<16 x i32> %input) {
+; SSE41-LABEL: vectest512:
+; SSE41: # %bb.0:
+; SSE41-NEXT: por %xmm3, %xmm1
+; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: ptest %xmm1, %xmm1
+; SSE41-NEXT: setne %al
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vectest512:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: setne %al
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %t0 = bitcast <16 x i32> %input to i512
+ %t1 = icmp ne i512 %t0, 0
+ %t2 = zext i1 %t1 to i32
+ ret i32 %t2
+}
+
+define i32 @vecsel128(<4 x i32> %input, i32 %a, i32 %b) {
+; SSE41-LABEL: vecsel128:
+; SSE41: # %bb.0:
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: cmovel %esi, %edi
+; SSE41-NEXT: movl %edi, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vecsel128:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %xmm0, %xmm0
+; AVX-NEXT: cmovel %esi, %edi
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: retq
+ %t0 = bitcast <4 x i32> %input to i128
+ %t1 = icmp ne i128 %t0, 0
+ %t2 = select i1 %t1, i32 %a, i32 %b
+ ret i32 %t2
+}
+
+define i32 @vecsel256(<8 x i32> %input, i32 %a, i32 %b) {
+; SSE41-LABEL: vecsel256:
+; SSE41: # %bb.0:
+; SSE41-NEXT: por %xmm1, %xmm0
+; SSE41-NEXT: ptest %xmm0, %xmm0
+; SSE41-NEXT: cmovel %esi, %edi
+; SSE41-NEXT: movl %edi, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vecsel256:
+; AVX: # %bb.0:
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: cmovel %esi, %edi
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %t0 = bitcast <8 x i32> %input to i256
+ %t1 = icmp ne i256 %t0, 0
+ %t2 = select i1 %t1, i32 %a, i32 %b
+ ret i32 %t2
+}
+
+define i32 @vecsel512(<16 x i32> %input, i32 %a, i32 %b) {
+; SSE41-LABEL: vecsel512:
+; SSE41: # %bb.0:
+; SSE41-NEXT: por %xmm3, %xmm1
+; SSE41-NEXT: por %xmm2, %xmm1
+; SSE41-NEXT: por %xmm0, %xmm1
+; SSE41-NEXT: ptest %xmm1, %xmm1
+; SSE41-NEXT: cmovel %esi, %edi
+; SSE41-NEXT: movl %edi, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vecsel512:
+; AVX: # %bb.0:
+; AVX-NEXT: vorps %ymm1, %ymm0, %ymm0
+; AVX-NEXT: vptest %ymm0, %ymm0
+; AVX-NEXT: cmovel %esi, %edi
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %t0 = bitcast <16 x i32> %input to i512
+ %t1 = icmp ne i512 %t0, 0
+ %t2 = select i1 %t1, i32 %a, i32 %b
+ ret i32 %t2
+}
+
+define i32 @veccond128_ones(<4 x i32> %input) {
+; SSE41-LABEL: veccond128_ones:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE41-NEXT: pmovmskb %xmm1, %eax
+; SSE41-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE41-NEXT: je .LBB9_2
+; SSE41-NEXT: # %bb.1: # %if-true-block
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: retq
+; SSE41-NEXT: .LBB9_2: # %endif-block
+; SSE41-NEXT: movl $1, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: veccond128_ones:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX-NEXT: je .LBB9_2
+; AVX-NEXT: # %bb.1: # %if-true-block
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB9_2: # %endif-block
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: retq
+entry:
+ %0 = bitcast <4 x i32> %input to i128
+ %1 = icmp ne i128 %0, -1
+ br i1 %1, label %if-true-block, label %endif-block
+if-true-block:
+ ret i32 0
+endif-block:
+ ret i32 1
+}
+
+define i32 @veccond256_ones(<8 x i32> %input) {
+; SSE41-LABEL: veccond256_ones:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: movq %xmm1, %rax
+; SSE41-NEXT: movq %xmm0, %rcx
+; SSE41-NEXT: pextrq $1, %xmm1, %rdx
+; SSE41-NEXT: pextrq $1, %xmm0, %rsi
+; SSE41-NEXT: andq %rdx, %rsi
+; SSE41-NEXT: andq %rax, %rsi
+; SSE41-NEXT: andq %rcx, %rsi
+; SSE41-NEXT: cmpq $-1, %rsi
+; SSE41-NEXT: je .LBB10_2
+; SSE41-NEXT: # %bb.1: # %if-true-block
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: retq
+; SSE41-NEXT: .LBB10_2: # %endif-block
+; SSE41-NEXT: movl $1, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: veccond256_ones:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovq %xmm1, %rax
+; AVX-NEXT: vmovq %xmm0, %rcx
+; AVX-NEXT: vpextrq $1, %xmm1, %rdx
+; AVX-NEXT: vpextrq $1, %xmm0, %rsi
+; AVX-NEXT: andq %rdx, %rsi
+; AVX-NEXT: andq %rax, %rsi
+; AVX-NEXT: andq %rcx, %rsi
+; AVX-NEXT: cmpq $-1, %rsi
+; AVX-NEXT: je .LBB10_2
+; AVX-NEXT: # %bb.1: # %if-true-block
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB10_2: # %endif-block
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+entry:
+ %0 = bitcast <8 x i32> %input to i256
+ %1 = icmp ne i256 %0, -1
+ br i1 %1, label %if-true-block, label %endif-block
+if-true-block:
+ ret i32 0
+endif-block:
+ ret i32 1
+}
+
+define i32 @veccond512_ones(<16 x i32> %input) {
+; SSE41-LABEL: veccond512_ones:
+; SSE41: # %bb.0: # %entry
+; SSE41-NEXT: movq %xmm2, %rax
+; SSE41-NEXT: movq %xmm0, %r8
+; SSE41-NEXT: movq %xmm3, %rdx
+; SSE41-NEXT: movq %xmm1, %rsi
+; SSE41-NEXT: andq %rdx, %rsi
+; SSE41-NEXT: andq %rax, %rsi
+; SSE41-NEXT: pextrq $1, %xmm2, %rax
+; SSE41-NEXT: pextrq $1, %xmm0, %rdx
+; SSE41-NEXT: pextrq $1, %xmm3, %rdi
+; SSE41-NEXT: pextrq $1, %xmm1, %rcx
+; SSE41-NEXT: andq %rdi, %rcx
+; SSE41-NEXT: andq %rax, %rcx
+; SSE41-NEXT: andq %rdx, %rcx
+; SSE41-NEXT: andq %rsi, %rcx
+; SSE41-NEXT: andq %r8, %rcx
+; SSE41-NEXT: cmpq $-1, %rcx
+; SSE41-NEXT: je .LBB11_2
+; SSE41-NEXT: # %bb.1: # %if-true-block
+; SSE41-NEXT: xorl %eax, %eax
+; SSE41-NEXT: retq
+; SSE41-NEXT: .LBB11_2: # %endif-block
+; SSE41-NEXT: movl $1, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: veccond512_ones:
+; AVX: # %bb.0: # %entry
+; AVX-NEXT: vmovq %xmm1, %rax
+; AVX-NEXT: vmovq %xmm0, %r8
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vmovq %xmm2, %rdx
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX-NEXT: vmovq %xmm3, %rsi
+; AVX-NEXT: andq %rdx, %rsi
+; AVX-NEXT: andq %rax, %rsi
+; AVX-NEXT: vpextrq $1, %xmm1, %rax
+; AVX-NEXT: vpextrq $1, %xmm0, %rdx
+; AVX-NEXT: vpextrq $1, %xmm2, %rdi
+; AVX-NEXT: vpextrq $1, %xmm3, %rcx
+; AVX-NEXT: andq %rdi, %rcx
+; AVX-NEXT: andq %rax, %rcx
+; AVX-NEXT: andq %rdx, %rcx
+; AVX-NEXT: andq %rsi, %rcx
+; AVX-NEXT: andq %r8, %rcx
+; AVX-NEXT: cmpq $-1, %rcx
+; AVX-NEXT: je .LBB11_2
+; AVX-NEXT: # %bb.1: # %if-true-block
+; AVX-NEXT: xorl %eax, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+; AVX-NEXT: .LBB11_2: # %endif-block
+; AVX-NEXT: movl $1, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+entry:
+ %0 = bitcast <16 x i32> %input to i512
+ %1 = icmp ne i512 %0, -1
+ br i1 %1, label %if-true-block, label %endif-block
+if-true-block:
+ ret i32 0
+endif-block:
+ ret i32 1
+}
+
+define i32 @vecsel128_ones(<4 x i32> %input, i32 %a, i32 %b) {
+; SSE41-LABEL: vecsel128_ones:
+; SSE41: # %bb.0:
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE41-NEXT: pcmpeqb %xmm0, %xmm1
+; SSE41-NEXT: pmovmskb %xmm1, %eax
+; SSE41-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; SSE41-NEXT: cmovel %esi, %edi
+; SSE41-NEXT: movl %edi, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vecsel128_ones:
+; AVX: # %bb.0:
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpmovmskb %xmm0, %eax
+; AVX-NEXT: cmpl $65535, %eax # imm = 0xFFFF
+; AVX-NEXT: cmovel %esi, %edi
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: retq
+ %t0 = bitcast <4 x i32> %input to i128
+ %t1 = icmp ne i128 %t0, -1
+ %t2 = select i1 %t1, i32 %a, i32 %b
+ ret i32 %t2
+}
+
+define i32 @vecsel256_ones(<8 x i32> %input, i32 %a, i32 %b) {
+; SSE41-LABEL: vecsel256_ones:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movq %xmm1, %rax
+; SSE41-NEXT: movq %xmm0, %r8
+; SSE41-NEXT: pextrq $1, %xmm1, %rdx
+; SSE41-NEXT: pextrq $1, %xmm0, %rcx
+; SSE41-NEXT: andq %rdx, %rcx
+; SSE41-NEXT: andq %rax, %rcx
+; SSE41-NEXT: andq %r8, %rcx
+; SSE41-NEXT: cmpq $-1, %rcx
+; SSE41-NEXT: cmovel %esi, %edi
+; SSE41-NEXT: movl %edi, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vecsel256_ones:
+; AVX: # %bb.0:
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
+; AVX-NEXT: vmovq %xmm1, %r8
+; AVX-NEXT: vmovq %xmm0, %rcx
+; AVX-NEXT: vpextrq $1, %xmm1, %rdx
+; AVX-NEXT: vpextrq $1, %xmm0, %rax
+; AVX-NEXT: andq %rdx, %rax
+; AVX-NEXT: andq %r8, %rax
+; AVX-NEXT: andq %rcx, %rax
+; AVX-NEXT: cmpq $-1, %rax
+; AVX-NEXT: cmovel %esi, %edi
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %t0 = bitcast <8 x i32> %input to i256
+ %t1 = icmp ne i256 %t0, -1
+ %t2 = select i1 %t1, i32 %a, i32 %b
+ ret i32 %t2
+}
+
+define i32 @vecsel512_ones(<16 x i32> %input, i32 %a, i32 %b) {
+; SSE41-LABEL: vecsel512_ones:
+; SSE41: # %bb.0:
+; SSE41-NEXT: movq %xmm2, %rax
+; SSE41-NEXT: movq %xmm0, %r8
+; SSE41-NEXT: movq %xmm3, %rdx
+; SSE41-NEXT: movq %xmm1, %rcx
+; SSE41-NEXT: andq %rdx, %rcx
+; SSE41-NEXT: andq %rax, %rcx
+; SSE41-NEXT: pextrq $1, %xmm2, %r10
+; SSE41-NEXT: pextrq $1, %xmm0, %r9
+; SSE41-NEXT: pextrq $1, %xmm3, %rdx
+; SSE41-NEXT: pextrq $1, %xmm1, %rax
+; SSE41-NEXT: andq %rdx, %rax
+; SSE41-NEXT: andq %r10, %rax
+; SSE41-NEXT: andq %r9, %rax
+; SSE41-NEXT: andq %rcx, %rax
+; SSE41-NEXT: andq %r8, %rax
+; SSE41-NEXT: cmpq $-1, %rax
+; SSE41-NEXT: cmovel %esi, %edi
+; SSE41-NEXT: movl %edi, %eax
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: vecsel512_ones:
+; AVX: # %bb.0:
+; AVX-NEXT: vmovq %xmm1, %rax
+; AVX-NEXT: vmovq %xmm0, %r8
+; AVX-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX-NEXT: vmovq %xmm2, %rdx
+; AVX-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX-NEXT: vmovq %xmm3, %rcx
+; AVX-NEXT: andq %rdx, %rcx
+; AVX-NEXT: andq %rax, %rcx
+; AVX-NEXT: vpextrq $1, %xmm1, %r10
+; AVX-NEXT: vpextrq $1, %xmm0, %r9
+; AVX-NEXT: vpextrq $1, %xmm2, %rdx
+; AVX-NEXT: vpextrq $1, %xmm3, %rax
+; AVX-NEXT: andq %rdx, %rax
+; AVX-NEXT: andq %r10, %rax
+; AVX-NEXT: andq %r9, %rax
+; AVX-NEXT: andq %rcx, %rax
+; AVX-NEXT: andq %r8, %rax
+; AVX-NEXT: cmpq $-1, %rax
+; AVX-NEXT: cmovel %esi, %edi
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+ %t0 = bitcast <16 x i32> %input to i512
+ %t1 = icmp ne i512 %t0, -1
+ %t2 = select i1 %t1, i32 %a, i32 %b
+ ret i32 %t2
+}
+
More information about the llvm-commits
mailing list