[llvm] r373154 - [X86] Match (or (and A, B), (andn (A, C))) to VPTERNLOG with AVX512.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Sep 28 18:24:17 PDT 2019


Author: ctopper
Date: Sat Sep 28 18:24:16 2019
New Revision: 373154

URL: http://llvm.org/viewvc/llvm-project?rev=373154&view=rev
Log:
[X86] Match (or (and A, B), (andn (A, C))) to VPTERNLOG with AVX512.

This uses a similar isel pattern as we used for vpcmov with XOP.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll
    llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll
    llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll
    llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll
    llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=373154&r1=373153&r2=373154&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Sep 28 18:24:16 2019
@@ -11533,6 +11533,49 @@ let Predicates = [HasVLX] in {
             (VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
 }
 
+let Predicates = [HasVLX] in {
+  def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2),
+                       (X86andnp VR128X:$src1, VR128X:$src3))),
+            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+  def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2),
+                       (X86andnp VR128X:$src1, VR128X:$src3))),
+            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+  def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2),
+                       (X86andnp VR128X:$src1, VR128X:$src3))),
+            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+  def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2),
+                       (X86andnp VR128X:$src1, VR128X:$src3))),
+            (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
+
+  def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2),
+                       (X86andnp VR256X:$src1, VR256X:$src3))),
+            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+  def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2),
+                        (X86andnp VR256X:$src1, VR256X:$src3))),
+            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+  def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2),
+                       (X86andnp VR256X:$src1, VR256X:$src3))),
+            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+  def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2),
+                       (X86andnp VR256X:$src1, VR256X:$src3))),
+            (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
+}
+
+let Predicates = [HasAVX512] in {
+  def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2),
+                       (X86andnp VR512:$src1, VR512:$src3))),
+            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+  def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2),
+                        (X86andnp VR512:$src1, VR512:$src3))),
+            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+  def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2),
+                        (X86andnp VR512:$src1, VR512:$src3))),
+            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+  def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2),
+                       (X86andnp VR512:$src1, VR512:$src3))),
+            (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - FixupImm
 //===----------------------------------------------------------------------===//

Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll?rev=373154&r1=373153&r2=373154&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll Sat Sep 28 18:24:16 2019
@@ -1548,16 +1548,12 @@ define <64 x i8> @splatconstant_funnnel_
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm4, %ymm3
 ; AVX512VL-NEXT:    vpsllw $4, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpand %ymm4, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpor %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm4, %ymm2
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpandn %ymm1, %ymm4, %ymm1
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $226, %ymm1, %ymm4, %ymm0
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll?rev=373154&r1=373153&r2=373154&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll Sat Sep 28 18:24:16 2019
@@ -172,47 +172,39 @@ define <64 x i8> @var_funnnel_v64i8(<64
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm4, %ymm5, %ymm4
-; AVX512VL-NEXT:    vpsllw $4, %ymm3, %ymm6
-; AVX512VL-NEXT:    vpand %ymm5, %ymm6, %ymm6
-; AVX512VL-NEXT:    vpor %ymm4, %ymm6, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
-; AVX512VL-NEXT:    vpand %ymm6, %ymm2, %ymm2
+; AVX512VL-NEXT:    vpsllw $4, %ymm3, %ymm5
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm4, %ymm6, %ymm5
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
+; AVX512VL-NEXT:    vpand %ymm4, %ymm2, %ymm2
 ; AVX512VL-NEXT:    vpsllw $5, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpsrlw $6, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; AVX512VL-NEXT:    vpandn %ymm4, %ymm7, %ymm4
-; AVX512VL-NEXT:    vpsllw $2, %ymm3, %ymm8
-; AVX512VL-NEXT:    vpand %ymm7, %ymm8, %ymm8
-; AVX512VL-NEXT:    vpor %ymm4, %ymm8, %ymm4
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpsrlw $6, %ymm3, %ymm5
+; AVX512VL-NEXT:    vpsllw $2, %ymm3, %ymm7
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm5, %ymm8, %ymm7
 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpsrlw $7, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT:    vpand %ymm8, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm7, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpsrlw $7, %ymm3, %ymm5
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT:    vpand %ymm7, %ymm5, %ymm5
 ; AVX512VL-NEXT:    vpaddb %ymm3, %ymm3, %ymm9
-; AVX512VL-NEXT:    vpor %ymm4, %ymm9, %ymm4
+; AVX512VL-NEXT:    vpor %ymm5, %ymm9, %ymm5
 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm5, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm5, %ymm3
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT:    vpand %ymm6, %ymm1, %ymm1
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm5
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm6, %ymm5
+; AVX512VL-NEXT:    vpand %ymm4, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm5, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm7, %ymm3
 ; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm4
-; AVX512VL-NEXT:    vpand %ymm7, %ymm4, %ymm4
-; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm8, %ymm4
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpand %ymm8, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpand %ymm7, %ymm3, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm4
 ; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
@@ -826,16 +818,12 @@ define <64 x i8> @splatconstant_funnnel_
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm2, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm2, %ymm3, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpandn %ymm2, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $226, %ymm2, %ymm3, %ymm0
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll?rev=373154&r1=373153&r2=373154&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll Sat Sep 28 18:24:16 2019
@@ -1532,16 +1532,12 @@ define <64 x i8> @splatconstant_funnnel_
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm2
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm1, %ymm3
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm4, %ymm3
 ; AVX512VL-NEXT:    vpsllw $4, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpand %ymm4, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpor %ymm3, %ymm2, %ymm2
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm4 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm4, %ymm2
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpandn %ymm1, %ymm4, %ymm1
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpor %ymm1, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $226, %ymm1, %ymm4, %ymm0
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm2, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll?rev=373154&r1=373153&r2=373154&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll Sat Sep 28 18:24:16 2019
@@ -181,50 +181,42 @@ define <64 x i8> @var_funnnel_v64i8(<64
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm4, %ymm5, %ymm4
-; AVX512VL-NEXT:    vpsllw $4, %ymm3, %ymm6
-; AVX512VL-NEXT:    vpand %ymm5, %ymm6, %ymm6
-; AVX512VL-NEXT:    vpor %ymm4, %ymm6, %ymm4
-; AVX512VL-NEXT:    vpxor %xmm6, %xmm6, %xmm6
-; AVX512VL-NEXT:    vpsubb %ymm2, %ymm6, %ymm2
+; AVX512VL-NEXT:    vpsllw $4, %ymm3, %ymm5
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm4, %ymm6, %ymm5
+; AVX512VL-NEXT:    vpxor %xmm4, %xmm4, %xmm4
+; AVX512VL-NEXT:    vpsubb %ymm2, %ymm4, %ymm2
 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm7 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7]
 ; AVX512VL-NEXT:    vpand %ymm7, %ymm2, %ymm2
 ; AVX512VL-NEXT:    vpsllw $5, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpsrlw $6, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm8 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; AVX512VL-NEXT:    vpandn %ymm4, %ymm8, %ymm4
-; AVX512VL-NEXT:    vpsllw $2, %ymm3, %ymm9
-; AVX512VL-NEXT:    vpand %ymm8, %ymm9, %ymm9
-; AVX512VL-NEXT:    vpor %ymm4, %ymm9, %ymm4
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpsrlw $6, %ymm3, %ymm5
+; AVX512VL-NEXT:    vpsllw $2, %ymm3, %ymm8
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm9 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm5, %ymm9, %ymm8
 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
-; AVX512VL-NEXT:    vpsrlw $7, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT:    vpand %ymm9, %ymm4, %ymm4
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm8, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpsrlw $7, %ymm3, %ymm5
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT:    vpand %ymm8, %ymm5, %ymm5
 ; AVX512VL-NEXT:    vpaddb %ymm3, %ymm3, %ymm10
-; AVX512VL-NEXT:    vpor %ymm4, %ymm10, %ymm4
+; AVX512VL-NEXT:    vpor %ymm5, %ymm10, %ymm5
 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm5, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm5, %ymm3
-; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
-; AVX512VL-NEXT:    vpsubb %ymm1, %ymm6, %ymm1
+; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm5
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm6, %ymm5
+; AVX512VL-NEXT:    vpsubb %ymm1, %ymm4, %ymm1
 ; AVX512VL-NEXT:    vpand %ymm7, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm5, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm8, %ymm3
 ; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm4
-; AVX512VL-NEXT:    vpand %ymm8, %ymm4, %ymm4
-; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm9, %ymm4
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpand %ymm9, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpand %ymm8, %ymm3, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm4
 ; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
@@ -846,16 +838,12 @@ define <64 x i8> @splatconstant_funnnel_
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm2, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm2, %ymm3, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpandn %ymm2, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $226, %ymm2, %ymm3, %ymm0
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll?rev=373154&r1=373153&r2=373154&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll Sat Sep 28 18:24:16 2019
@@ -167,44 +167,36 @@ define <64 x i8> @var_rotate_v64i8(<64 x
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm1, %ymm2
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm3
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm4, %ymm5, %ymm4
-; AVX512VL-NEXT:    vpsllw $4, %ymm3, %ymm6
-; AVX512VL-NEXT:    vpand %ymm5, %ymm6, %ymm6
-; AVX512VL-NEXT:    vpor %ymm4, %ymm6, %ymm4
+; AVX512VL-NEXT:    vpsllw $4, %ymm3, %ymm5
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm4, %ymm6, %ymm5
 ; AVX512VL-NEXT:    vpsllw $5, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
 ; AVX512VL-NEXT:    vpsrlw $6, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm6 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
-; AVX512VL-NEXT:    vpandn %ymm4, %ymm6, %ymm4
-; AVX512VL-NEXT:    vpsllw $2, %ymm3, %ymm7
-; AVX512VL-NEXT:    vpand %ymm6, %ymm7, %ymm7
-; AVX512VL-NEXT:    vpor %ymm4, %ymm7, %ymm4
+; AVX512VL-NEXT:    vpsllw $2, %ymm3, %ymm5
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm7 = [252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm4, %ymm7, %ymm5
 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm5, %ymm3, %ymm3
 ; AVX512VL-NEXT:    vpsrlw $7, %ymm3, %ymm4
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512VL-NEXT:    vpand %ymm7, %ymm4, %ymm4
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; AVX512VL-NEXT:    vpand %ymm5, %ymm4, %ymm4
 ; AVX512VL-NEXT:    vpaddb %ymm3, %ymm3, %ymm8
 ; AVX512VL-NEXT:    vpor %ymm4, %ymm8, %ymm4
 ; AVX512VL-NEXT:    vpaddb %ymm2, %ymm2, %ymm2
 ; AVX512VL-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm5, %ymm3
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm4
-; AVX512VL-NEXT:    vpand %ymm5, %ymm4, %ymm4
-; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm6, %ymm4
 ; AVX512VL-NEXT:    vpsllw $5, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpsrlw $6, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpandn %ymm3, %ymm6, %ymm3
 ; AVX512VL-NEXT:    vpsllw $2, %ymm0, %ymm4
-; AVX512VL-NEXT:    vpand %ymm6, %ymm4, %ymm4
-; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
+; AVX512VL-NEXT:    vpternlogq $226, %ymm3, %ymm7, %ymm4
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpblendvb %ymm1, %ymm4, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm3
-; AVX512VL-NEXT:    vpand %ymm7, %ymm3, %ymm3
+; AVX512VL-NEXT:    vpand %ymm5, %ymm3, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm0, %ymm0, %ymm4
 ; AVX512VL-NEXT:    vpor %ymm3, %ymm4, %ymm3
 ; AVX512VL-NEXT:    vpaddb %ymm1, %ymm1, %ymm1
@@ -815,16 +807,12 @@ define <64 x i8> @splatconstant_rotate_v
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm2, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm2, %ymm3, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpandn %ymm2, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $226, %ymm2, %ymm3, %ymm0
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
@@ -964,18 +952,14 @@ define <64 x i8> @splatconstant_rotate_m
 ; AVX512VL:       # %bb.0:
 ; AVX512VL-NEXT:    vextracti64x4 $1, %zmm0, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm1, %ymm2
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
-; AVX512VL-NEXT:    vpandn %ymm2, %ymm3, %ymm2
 ; AVX512VL-NEXT:    vpsllw $4, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpand %ymm3, %ymm1, %ymm1
-; AVX512VL-NEXT:    vpor %ymm2, %ymm1, %ymm1
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240]
+; AVX512VL-NEXT:    vpternlogq $226, %ymm2, %ymm3, %ymm1
 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39]
 ; AVX512VL-NEXT:    vpand %ymm2, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpsrlw $4, %ymm0, %ymm4
-; AVX512VL-NEXT:    vpandn %ymm4, %ymm3, %ymm4
 ; AVX512VL-NEXT:    vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpand %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT:    vpor %ymm4, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $226, %ymm4, %ymm3, %ymm0
 ; AVX512VL-NEXT:    vpand %ymm2, %ymm0, %ymm0
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0
 ; AVX512VL-NEXT:    retq




More information about the llvm-commits mailing list