[llvm] r368016 - [X86][SSE] Enable min/max partial reduction

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 6 04:00:34 PDT 2019


Author: rksimon
Date: Tue Aug  6 04:00:34 2019
New Revision: 368016

URL: http://llvm.org/viewvc/llvm-project?rev=368016&view=rev
Log:
[X86][SSE] Enable min/max partial reduction

As mentioned on D65047 / rL366933 the plan is to enable partial reduction handling wherever possible.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
    llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
    llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
    llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=368016&r1=368015&r2=368016&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Aug  6 04:00:34 2019
@@ -35384,7 +35384,7 @@ static SDValue combineHorizontalMinMaxRe
   // Check for SMAX/SMIN/UMAX/UMIN horizontal reduction patterns.
   ISD::NodeType BinOp;
   SDValue Src = DAG.matchBinOpReduction(
-      Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN});
+      Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true);
   if (!Src)
     return SDValue();
 

Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll?rev=368016&r1=368015&r2=368016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smax.ll Tue Aug  6 04:00:34 2019
@@ -1859,13 +1859,10 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ;
 ; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI12_0, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
+; X86-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -1894,13 +1891,10 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ;
 ; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
+; X64-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
@@ -1942,13 +1936,10 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ;
 ; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI13_0, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
+; X86-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -1977,13 +1968,10 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ;
 ; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
+; X64-AVX-NEXT:    xorl $32767, %eax ## imm = 0x7FFF
 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
@@ -2047,15 +2035,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ;
 ; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI14_0, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT:    xorb $127, %al
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2106,15 +2091,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ;
 ; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT:    xorb $127, %al
 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
@@ -2181,15 +2163,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ;
 ; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI15_0, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT:    xorb $127, %al
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2240,15 +2219,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ;
 ; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT:    xorb $127, %al
 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll?rev=368016&r1=368015&r2=368016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-smin.ll Tue Aug  6 04:00:34 2019
@@ -1863,13 +1863,10 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ;
 ; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI12_0, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
+; X86-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -1898,13 +1895,10 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ;
 ; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
+; X64-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
@@ -1946,13 +1940,10 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ;
 ; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI13_0, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
+; X86-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -1981,13 +1972,10 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ;
 ; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminsw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
+; X64-AVX-NEXT:    xorl $32768, %eax ## imm = 0x8000
 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
@@ -2051,15 +2039,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ;
 ; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI14_0, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT:    xorb $-128, %al
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2110,15 +2095,12 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ;
 ; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT:    xorb $-128, %al
 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq
@@ -2185,15 +2167,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ;
 ; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpxor LCPI15_0, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT:    xorb $-128, %al
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2244,15 +2223,12 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ;
 ; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpxor {{.*}}(%rip), %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminsb %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX-NEXT:    xorb $-128, %al
 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-AVX-NEXT:    vzeroupper
 ; X64-AVX-NEXT:    retq

Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll?rev=368016&r1=368015&r2=368016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umax.ll Tue Aug  6 04:00:34 2019
@@ -2061,13 +2061,11 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ;
 ; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
+; X86-AVX-NEXT:    notl %eax
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2104,18 +2102,37 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-SSE42-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vmovd %xmm0, %eax
-; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT:    vzeroupper
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_reduce_v16i16_v8i16:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT:    vmovd %xmm0, %eax
+; X64-AVX1-NEXT:    notl %eax
+; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_reduce_v16i16_v8i16:
+; X64-AVX2:       ## %bb.0:
+; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovd %xmm0, %eax
+; X64-AVX2-NEXT:    notl %eax
+; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: test_reduce_v16i16_v8i16:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512-NEXT:    notl %eax
+; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %1  = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2  = icmp ugt <16 x i16> %a0, %1
   %3  = select <16 x i1> %2, <16 x i16> %a0, <16 x i16> %1
@@ -2164,13 +2181,11 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ;
 ; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
+; X86-AVX-NEXT:    notl %eax
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2207,18 +2222,37 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ; X64-SSE42-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-SSE42-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vmovd %xmm0, %eax
-; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
-; X64-AVX-NEXT:    vzeroupper
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_reduce_v32i16_v8i16:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT:    vmovd %xmm0, %eax
+; X64-AVX1-NEXT:    notl %eax
+; X64-AVX1-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_reduce_v32i16_v8i16:
+; X64-AVX2:       ## %bb.0:
+; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovd %xmm0, %eax
+; X64-AVX2-NEXT:    notl %eax
+; X64-AVX2-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: test_reduce_v32i16_v8i16:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT:    vmovd %xmm0, %eax
+; X64-AVX512-NEXT:    notl %eax
+; X64-AVX512-NEXT:    ## kill: def $ax killed $ax killed $eax
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %1  = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2  = icmp ugt <32 x i16> %a0, %1
   %3  = select <32 x i1> %2, <32 x i16> %a0, <32 x i16> %1
@@ -2264,15 +2298,13 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ;
 ; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT:    notb %al
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2306,20 +2338,43 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-SSE42-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
-; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT:    vzeroupper
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_reduce_v32i8_v16i8:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT:    notb %al
+; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_reduce_v32i8_v16i8:
+; X64-AVX2:       ## %bb.0:
+; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT:    notb %al
+; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: test_reduce_v32i8_v16i8:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT:    notb %al
+; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %1  = shufflevector <32 x i8> %a0, <32 x i8> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2  = icmp ugt <32 x i8> %a0, %1
   %3  = select <32 x i1> %2, <32 x i8> %a0, <32 x i8> %1
@@ -2368,15 +2423,13 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ;
 ; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X86-AVX-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
+; X86-AVX-NEXT:    notb %al
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
 ; X86-AVX-NEXT:    retl
@@ -2410,20 +2463,43 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ; X64-SSE42-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-SSE42-NEXT:    retq
 ;
-; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
-; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
-; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
-; X64-AVX-NEXT:    vzeroupper
-; X64-AVX-NEXT:    retq
+; X64-AVX1-LABEL: test_reduce_v64i8_v16i8:
+; X64-AVX1:       ## %bb.0:
+; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX1-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX1-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX1-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX1-NEXT:    notb %al
+; X64-AVX1-NEXT:    ## kill: def $al killed $al killed $eax
+; X64-AVX1-NEXT:    vzeroupper
+; X64-AVX1-NEXT:    retq
+;
+; X64-AVX2-LABEL: test_reduce_v64i8_v16i8:
+; X64-AVX2:       ## %bb.0:
+; X64-AVX2-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; X64-AVX2-NEXT:    vpxor %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX2-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX2-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX2-NEXT:    notb %al
+; X64-AVX2-NEXT:    ## kill: def $al killed $al killed $eax
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512-LABEL: test_reduce_v64i8_v16i8:
+; X64-AVX512:       ## %bb.0:
+; X64-AVX512-NEXT:    vpternlogq $15, %xmm0, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpsrlw $8, %xmm0, %xmm1
+; X64-AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX512-NEXT:    vphminposuw %xmm0, %xmm0
+; X64-AVX512-NEXT:    vpextrb $0, %xmm0, %eax
+; X64-AVX512-NEXT:    notb %al
+; X64-AVX512-NEXT:    ## kill: def $al killed $al killed $eax
+; X64-AVX512-NEXT:    vzeroupper
+; X64-AVX512-NEXT:    retq
   %1  = shufflevector <64 x i8> %a0, <64 x i8> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %2  = icmp ugt <64 x i8> %a0, %1
   %3  = select <64 x i1> %2, <64 x i8> %a0, <64 x i8> %1

Modified: llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll?rev=368016&r1=368015&r2=368016&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll (original)
+++ llvm/trunk/test/CodeGen/X86/horizontal-reduce-umin.ll Tue Aug  6 04:00:34 2019
@@ -1922,12 +1922,7 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ;
 ; X86-AVX-LABEL: test_reduce_v16i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
@@ -1964,12 +1959,7 @@ define i16 @test_reduce_v16i16_v8i16(<16
 ;
 ; X64-AVX-LABEL: test_reduce_v16i16_v8i16:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-AVX-NEXT:    vzeroupper
@@ -2019,12 +2009,7 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ;
 ; X86-AVX-LABEL: test_reduce_v32i16_v8i16:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vmovd %xmm0, %eax
 ; X86-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X86-AVX-NEXT:    vzeroupper
@@ -2061,12 +2046,7 @@ define i16 @test_reduce_v32i16_v8i16(<32
 ;
 ; X64-AVX-LABEL: test_reduce_v32i16_v8i16:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminuw %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vmovd %xmm0, %eax
 ; X64-AVX-NEXT:    ## kill: def $ax killed $ax killed $eax
 ; X64-AVX-NEXT:    vzeroupper
@@ -2113,14 +2093,9 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ;
 ; X86-AVX-LABEL: test_reduce_v32i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
@@ -2154,14 +2129,9 @@ define i8 @test_reduce_v32i8_v16i8(<32 x
 ;
 ; X64-AVX-LABEL: test_reduce_v32i8_v16i8:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-AVX-NEXT:    vzeroupper
@@ -2211,14 +2181,9 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ;
 ; X86-AVX-LABEL: test_reduce_v64i8_v16i8:
 ; X86-AVX:       ## %bb.0:
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X86-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X86-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X86-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X86-AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; X86-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X86-AVX-NEXT:    vzeroupper
@@ -2252,14 +2217,9 @@ define i8 @test_reduce_v64i8_v16i8(<64 x
 ;
 ; X64-AVX-LABEL: test_reduce_v64i8_v16i8:
 ; X64-AVX:       ## %bb.0:
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3]
-; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
-; X64-AVX-NEXT:    vpsrld $16, %xmm0, %xmm1
-; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpsrlw $8, %xmm0, %xmm1
 ; X64-AVX-NEXT:    vpminub %xmm1, %xmm0, %xmm0
+; X64-AVX-NEXT:    vphminposuw %xmm0, %xmm0
 ; X64-AVX-NEXT:    vpextrb $0, %xmm0, %eax
 ; X64-AVX-NEXT:    ## kill: def $al killed $al killed $eax
 ; X64-AVX-NEXT:    vzeroupper




More information about the llvm-commits mailing list