[llvm] 859c7e4 - [X86] MatchVectorAllEqualTest - support for sub-128-bit vector icmp(and/or(extract(X,0),extract(X,1)),-1/0) reduction patterns

Sat Apr 1 08:37:59 PDT 2023

Author: Simon Pilgrim
Date: 2023-04-01T16:37:28+01:00
New Revision: 859c7e4b10edd3ef92b9e04b50708ae850a554a9

URL: https://github.com/llvm/llvm-project/commit/859c7e4b10edd3ef92b9e04b50708ae850a554a9
DIFF: https://github.com/llvm/llvm-project/commit/859c7e4b10edd3ef92b9e04b50708ae850a554a9.diff

LOG: [X86] MatchVectorAllEqualTest - support for sub-128-bit vector icmp(and/or(extract(X,0),extract(X,1)),-1/0) reduction patterns

LowerVectorAllEqual already has full support for this

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b62b49b8a7c9..246909bc62c7 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24481,9 +24481,8 @@ static SDValue MatchVectorAllEqualTest(SDValue LHS, SDValue RHS,
                         [VT](SDValue V) { return VT == V.getValueType(); }) &&
            "Reduction source vector mismatch");
 
-    // Quit if less than 128-bits or not splittable to 128/256-bit vector.
-    if (VT.getSizeInBits() < 128 ||
-        !llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
+    // Quit if not splittable to scalar/128/256/512-bit vector.
+    if (!llvm::has_single_bit<uint32_t>(VT.getSizeInBits()))
       return SDValue();
 
     // If more than one full vector is evaluated, AND/OR them first before

diff  --git a/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll b/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll
index 9550ff703c12..55a6a7022d0e 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-scalar.ll
@@ -358,34 +358,15 @@ define i1 @test_v16i64(ptr %ptr) nounwind {
 ;
 
 define i1 @test_v2i32(ptr %ptr) nounwind {
-; SSE2-LABEL: test_v2i32:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movq (%rdi), %rax
-; SSE2-NEXT:    movq %rax, %xmm0
-; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; SSE2-NEXT:    movd %xmm0, %ecx
-; SSE2-NEXT:    andl %eax, %ecx
-; SSE2-NEXT:    cmpl $-1, %ecx
-; SSE2-NEXT:    sete %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: test_v2i32:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    movq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shrq $32, %rcx
-; SSE41-NEXT:    andl %eax, %ecx
-; SSE41-NEXT:    cmpl $-1, %ecx
-; SSE41-NEXT:    sete %al
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_v2i32:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cmpq $-1, (%rdi)
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i32:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movq (%rdi), %rax
-; AVX-NEXT:    movq %rax, %rcx
-; AVX-NEXT:    shrq $32, %rcx
-; AVX-NEXT:    andl %eax, %ecx
-; AVX-NEXT:    cmpl $-1, %ecx
+; AVX-NEXT:    cmpq $-1, (%rdi)
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %vload = load <2 x i32>, ptr %ptr
@@ -658,21 +639,13 @@ define i1 @test_v16i32(ptr %ptr) nounwind {
 define i1 @test_v2i16(ptr %ptr) nounwind {
 ; SSE-LABEL: test_v2i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl (%rdi), %eax
-; SSE-NEXT:    movl %eax, %ecx
-; SSE-NEXT:    shrl $16, %ecx
-; SSE-NEXT:    andl %eax, %ecx
-; SSE-NEXT:    cmpw $-1, %cx
+; SSE-NEXT:    cmpl $-1, (%rdi)
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movl (%rdi), %eax
-; AVX-NEXT:    movl %eax, %ecx
-; AVX-NEXT:    shrl $16, %ecx
-; AVX-NEXT:    andl %eax, %ecx
-; AVX-NEXT:    cmpw $-1, %cx
+; AVX-NEXT:    cmpl $-1, (%rdi)
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %vload = load <2 x i16>, ptr %ptr
@@ -686,31 +659,13 @@ define i1 @test_v2i16(ptr %ptr) nounwind {
 define i1 @test_v4i16(ptr %ptr) nounwind {
 ; SSE-LABEL: test_v4i16:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movq (%rdi), %rax
-; SSE-NEXT:    movq %rax, %rcx
-; SSE-NEXT:    movl %eax, %edx
-; SSE-NEXT:    shrl $16, %edx
-; SSE-NEXT:    andl %eax, %edx
-; SSE-NEXT:    shrq $32, %rax
-; SSE-NEXT:    shrq $48, %rcx
-; SSE-NEXT:    andl %ecx, %eax
-; SSE-NEXT:    andl %edx, %eax
-; SSE-NEXT:    cmpw $-1, %ax
+; SSE-NEXT:    cmpq $-1, (%rdi)
 ; SSE-NEXT:    sete %al
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v4i16:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movq (%rdi), %rax
-; AVX-NEXT:    movq %rax, %rcx
-; AVX-NEXT:    movl %eax, %edx
-; AVX-NEXT:    shrl $16, %edx
-; AVX-NEXT:    andl %eax, %edx
-; AVX-NEXT:    shrq $32, %rax
-; AVX-NEXT:    shrq $48, %rcx
-; AVX-NEXT:    andl %ecx, %eax
-; AVX-NEXT:    andl %edx, %eax
-; AVX-NEXT:    cmpw $-1, %ax
+; AVX-NEXT:    cmpq $-1, (%rdi)
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %vload = load <4 x i16>, ptr %ptr
@@ -914,34 +869,15 @@ define i1 @test_v16i16(ptr %ptr) nounwind {
 ;
 
 define i1 @test_v2i8(ptr %ptr) nounwind {
-; SSE2-LABEL: test_v2i8:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movzwl (%rdi), %eax
-; SSE2-NEXT:    movd %eax, %xmm0
-; SSE2-NEXT:    movdqa %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT:    cmpb $-1, %al
-; SSE2-NEXT:    sete %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: test_v2i8:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    movzwl (%rdi), %eax
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $8, %ecx
-; SSE41-NEXT:    andl %eax, %ecx
-; SSE41-NEXT:    cmpb $-1, %cl
-; SSE41-NEXT:    sete %al
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_v2i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cmpw $-1, (%rdi)
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movzwl (%rdi), %eax
-; AVX-NEXT:    movl %eax, %ecx
-; AVX-NEXT:    shrl $8, %ecx
-; AVX-NEXT:    andl %eax, %ecx
-; AVX-NEXT:    cmpb $-1, %cl
+; AVX-NEXT:    cmpw $-1, (%rdi)
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %vload = load <2 x i8>, ptr %ptr
@@ -953,46 +889,15 @@ define i1 @test_v2i8(ptr %ptr) nounwind {
 }
 
 define i1 @test_v4i8(ptr %ptr) nounwind {
-; SSE2-LABEL: test_v4i8:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %cl
-; SSE2-NEXT:    andb %al, %cl
-; SSE2-NEXT:    cmpb $-1, %cl
-; SSE2-NEXT:    sete %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: test_v4i8:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    movl (%rdi), %eax
-; SSE41-NEXT:    movl %eax, %ecx
-; SSE41-NEXT:    shrl $8, %ecx
-; SSE41-NEXT:    movl %eax, %edx
-; SSE41-NEXT:    andl %eax, %ecx
-; SSE41-NEXT:    shrl $16, %eax
-; SSE41-NEXT:    shrl $24, %edx
-; SSE41-NEXT:    andl %eax, %edx
-; SSE41-NEXT:    andl %edx, %ecx
-; SSE41-NEXT:    cmpb $-1, %cl
-; SSE41-NEXT:    sete %al
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_v4i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cmpl $-1, (%rdi)
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v4i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movl (%rdi), %eax
-; AVX-NEXT:    movl %eax, %ecx
-; AVX-NEXT:    shrl $8, %ecx
-; AVX-NEXT:    movl %eax, %edx
-; AVX-NEXT:    andl %eax, %ecx
-; AVX-NEXT:    shrl $16, %eax
-; AVX-NEXT:    shrl $24, %edx
-; AVX-NEXT:    andl %eax, %edx
-; AVX-NEXT:    andl %edx, %ecx
-; AVX-NEXT:    cmpb $-1, %cl
+; AVX-NEXT:    cmpl $-1, (%rdi)
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %vload = load <4 x i8>, ptr %ptr
@@ -1008,76 +913,15 @@ define i1 @test_v4i8(ptr %ptr) nounwind {
 }
 
 define i1 @test_v8i8(ptr %ptr) nounwind {
-; SSE2-LABEL: test_v8i8:
-; SSE2:       # %bb.0:
-; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; SSE2-NEXT:    movaps %xmm0, -{{[0-9]+}}(%rsp)
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %eax
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %ecx
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %edx
-; SSE2-NEXT:    movzbl -{{[0-9]+}}(%rsp), %esi
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %al
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %cl
-; SSE2-NEXT:    andb %al, %cl
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %dl
-; SSE2-NEXT:    andb -{{[0-9]+}}(%rsp), %sil
-; SSE2-NEXT:    andb %dl, %sil
-; SSE2-NEXT:    andb %cl, %sil
-; SSE2-NEXT:    cmpb $-1, %sil
-; SSE2-NEXT:    sete %al
-; SSE2-NEXT:    retq
-;
-; SSE41-LABEL: test_v8i8:
-; SSE41:       # %bb.0:
-; SSE41-NEXT:    movq (%rdi), %rax
-; SSE41-NEXT:    movq %rax, %rcx
-; SSE41-NEXT:    shrq $32, %rcx
-; SSE41-NEXT:    movq %rax, %rdx
-; SSE41-NEXT:    shrq $40, %rdx
-; SSE41-NEXT:    movq %rax, %rsi
-; SSE41-NEXT:    shrq $48, %rsi
-; SSE41-NEXT:    movq %rax, %rdi
-; SSE41-NEXT:    shrq $56, %rdi
-; SSE41-NEXT:    movl %eax, %r8d
-; SSE41-NEXT:    shrl $8, %r8d
-; SSE41-NEXT:    andl %eax, %r8d
-; SSE41-NEXT:    movl %eax, %r9d
-; SSE41-NEXT:    shrl $24, %r9d
-; SSE41-NEXT:    shrl $16, %eax
-; SSE41-NEXT:    andl %r9d, %eax
-; SSE41-NEXT:    andl %r8d, %eax
-; SSE41-NEXT:    andl %edx, %ecx
-; SSE41-NEXT:    andl %edi, %esi
-; SSE41-NEXT:    andl %ecx, %esi
-; SSE41-NEXT:    andl %eax, %esi
-; SSE41-NEXT:    cmpb $-1, %sil
-; SSE41-NEXT:    sete %al
-; SSE41-NEXT:    retq
+; SSE-LABEL: test_v8i8:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cmpq $-1, (%rdi)
+; SSE-NEXT:    sete %al
+; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v8i8:
 ; AVX:       # %bb.0:
-; AVX-NEXT:    movq (%rdi), %rax
-; AVX-NEXT:    movq %rax, %rcx
-; AVX-NEXT:    shrq $32, %rcx
-; AVX-NEXT:    movq %rax, %rdx
-; AVX-NEXT:    shrq $40, %rdx
-; AVX-NEXT:    movq %rax, %rsi
-; AVX-NEXT:    shrq $48, %rsi
-; AVX-NEXT:    movq %rax, %rdi
-; AVX-NEXT:    shrq $56, %rdi
-; AVX-NEXT:    movl %eax, %r8d
-; AVX-NEXT:    shrl $8, %r8d
-; AVX-NEXT:    andl %eax, %r8d
-; AVX-NEXT:    movl %eax, %r9d
-; AVX-NEXT:    shrl $24, %r9d
-; AVX-NEXT:    shrl $16, %eax
-; AVX-NEXT:    andl %r9d, %eax
-; AVX-NEXT:    andl %r8d, %eax
-; AVX-NEXT:    andl %edx, %ecx
-; AVX-NEXT:    andl %edi, %esi
-; AVX-NEXT:    andl %ecx, %esi
-; AVX-NEXT:    andl %eax, %esi
-; AVX-NEXT:    cmpb $-1, %sil
+; AVX-NEXT:    cmpq $-1, (%rdi)
 ; AVX-NEXT:    sete %al
 ; AVX-NEXT:    retq
   %vload = load <8 x i8>, ptr %ptr