[llvm] 122a45f - [X86] Add isel patterns for matching broadcast vpternlog if the ternlog and the broadcast have different types.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 10 15:15:34 PDT 2020


Author: Craig Topper
Date: 2020-07-10T15:15:02-07:00
New Revision: 122a45fbac059be0fb88b2b909191d7a93ce9c09

URL: https://github.com/llvm/llvm-project/commit/122a45fbac059be0fb88b2b909191d7a93ce9c09
DIFF: https://github.com/llvm/llvm-project/commit/122a45fbac059be0fb88b2b909191d7a93ce9c09.diff

LOG: [X86] Add isel patterns for matching broadcast vpternlog if the ternlog and the broadcast have different types.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/test/CodeGen/X86/vector-fshl-128.ll
    llvm/test/CodeGen/X86/vector-fshl-256.ll
    llvm/test/CodeGen/X86/vector-fshl-512.ll
    llvm/test/CodeGen/X86/vector-fshr-128.ll
    llvm/test/CodeGen/X86/vector-fshr-256.ll
    llvm/test/CodeGen/X86/vector-fshr-512.ll
    llvm/test/CodeGen/X86/vector-shuffle-avx512.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0921a0e51668..a3ad0b1c8dd6 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -11365,6 +11365,36 @@ let Predicates = [HasVLX] in {
             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
                                (VPTERNLOG132_imm8 timm:$src4))>;
 
+  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v16i8 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
+                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v16i8 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v16i8 (X86vpternlog VR128X:$src1,
+                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG132_imm8 timm:$src4))>;
+
   def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
                                  (i8 timm:$src4))),
             (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
@@ -11382,6 +11412,66 @@ let Predicates = [HasVLX] in {
             (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
                                (VPTERNLOG132_imm8 timm:$src4))>;
 
+  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v8i16 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
+                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v8i16 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v8i16 (X86vpternlog VR128X:$src1,
+                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v4i32 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v4i32 (X86vpternlog (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v4i32 (X86vpternlog VR128X:$src1,
+                                 (bitconvert (v2i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v2i64 (X86vpternlog VR128X:$src1, VR128X:$src2,
+                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v2i64 (X86vpternlog (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR128X:$src2, VR128X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v2i64 (X86vpternlog VR128X:$src1,
+                                 (bitconvert (v4i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR128X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZ128rmbi VR128X:$src1, VR128X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
   def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
                                  (i8 timm:$src4))),
             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@@ -11399,6 +11489,36 @@ let Predicates = [HasVLX] in {
             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
                                (VPTERNLOG132_imm8 timm:$src4))>;
 
+  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v32i8 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
+                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v32i8 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v32i8 (X86vpternlog VR256X:$src1,
+                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG132_imm8 timm:$src4))>;
+
   def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
                                   (i8 timm:$src4))),
             (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
@@ -11415,6 +11535,66 @@ let Predicates = [HasVLX] in {
                                   VR256X:$src2, (i8 timm:$src4))),
             (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
                                (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                  (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v16i16 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
+                                  (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                  VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                  (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v16i16 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v16i16 (X86vpternlog VR256X:$src1,
+                                  (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                  VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v8i32 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v8i32 (X86vpternlog (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                  VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v8i32 (X86vpternlog VR256X:$src1,
+                                 (bitconvert (v4i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v4i64 (X86vpternlog VR256X:$src1, VR256X:$src2,
+                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v4i64 (X86vpternlog (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR256X:$src2, VR256X:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v4i64 (X86vpternlog VR256X:$src1,
+                                 (bitconvert (v8i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR256X:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZ256rmbi VR256X:$src1, VR256X:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
 }
 
 let Predicates = [HasAVX512] in {
@@ -11435,6 +11615,36 @@ let Predicates = [HasAVX512] in {
             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
                             (VPTERNLOG132_imm8 timm:$src4))>;
 
+  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+                                 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             timm:$src4)>;
+  def : Pat<(v64i8 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v64i8 (X86vpternlog VR512:$src1,
+                                 (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+                                 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                 (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                                timm:$src4)>;
+  def : Pat<(v64i8 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v64i8 (X86vpternlog VR512:$src1,
+                                 (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                 VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                                (VPTERNLOG132_imm8 timm:$src4))>;
+
   def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
                                   (i8 timm:$src4))),
             (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
@@ -11448,9 +11658,84 @@ let Predicates = [HasAVX512] in {
             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
                             (VPTERNLOG321_imm8 timm:$src4))>;
   def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
-                                 VR512:$src2, (i8 timm:$src4))),
+                                  VR512:$src2, (i8 timm:$src4))),
             (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
                             (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                  (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             timm:$src4)>;
+  def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                  VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                  (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             timm:$src4)>;
+  def : Pat<(v32i16 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                  VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                  (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             timm:$src4)>;
+  def : Pat<(v32i16 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v32i16 (X86vpternlog VR512:$src1,
+                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                  VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v16i32 (X86vpternlog VR512:$src1, VR512:$src2,
+                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                  (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             timm:$src4)>;
+  def : Pat<(v16i32 (X86vpternlog (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                  VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v16i32 (X86vpternlog VR512:$src1,
+                                  (bitconvert (v8i64 (X86VBroadcastld64 addr:$src3))),
+                                  VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGQZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             (VPTERNLOG132_imm8 timm:$src4))>;
+
+  def : Pat<(v8i64 (X86vpternlog VR512:$src1, VR512:$src2,
+                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                  (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                             timm:$src4)>;
+  def : Pat<(v8i64 (X86vpternlog (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                 VR512:$src2, VR512:$src1, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                                (VPTERNLOG321_imm8 timm:$src4))>;
+  def : Pat<(v8i64 (X86vpternlog VR512:$src1,
+                                  (bitconvert (v16i32 (X86VBroadcastld32 addr:$src3))),
+                                  VR512:$src2, (i8 timm:$src4))),
+            (VPTERNLOGDZrmbi VR512:$src1, VR512:$src2, addr:$src3,
+                               (VPTERNLOG132_imm8 timm:$src4))>;
 }
 
 // Patterns to implement vnot using vpternlog instead of creating all ones

diff  --git a/llvm/test/CodeGen/X86/vector-fshl-128.ll b/llvm/test/CodeGen/X86/vector-fshl-128.ll
index b2ad1b33384e..d8442048f65e 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-128.ll
@@ -2905,8 +2905,7 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %zmm2, %zmm2
 ; AVX512VL-NEXT:    vpord %zmm1, %zmm2, %zmm1
 ; AVX512VL-NEXT:    vpmovdb %zmm1, %xmm1
-; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm2 = [18446744073709551360,18446744073709551360]
-; AVX512VL-NEXT:    vpternlogq $216, %xmm2, %xmm1, %xmm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip){1to2}, %xmm1, %xmm0
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/vector-fshl-256.ll b/llvm/test/CodeGen/X86/vector-fshl-256.ll
index 674b064100c4..12feea765898 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-256.ll
@@ -2376,8 +2376,7 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
 ; AVX512VL-NEXT:    vpsrlw $8, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpackuswb %ymm4, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vpor %ymm1, %ymm2, %ymm1
-; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} ymm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
-; AVX512VL-NEXT:    vpternlogq $216, %ymm2, %ymm1, %ymm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip){1to4}, %ymm1, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: constant_funnnel_v32i8:

diff  --git a/llvm/test/CodeGen/X86/vector-fshl-512.ll b/llvm/test/CodeGen/X86/vector-fshl-512.ll
index 09a29fdbaad4..6e0cb76398df 100644
--- a/llvm/test/CodeGen/X86/vector-fshl-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshl-512.ll
@@ -1184,8 +1184,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX512F-NEXT:    vpackuswb %ymm5, %ymm1, %ymm1
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm3, %zmm1, %zmm1
 ; AVX512F-NEXT:    vporq %zmm1, %zmm2, %zmm1
-; AVX512F-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
-; AVX512F-NEXT:    vpternlogq $216, %zmm2, %zmm1, %zmm0
+; AVX512F-NEXT:    vpternlogq $216, {{.*}}(%rip){1to8}, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: constant_funnnel_v64i8:
@@ -1236,8 +1235,7 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX512VL-NEXT:    vpackuswb %ymm5, %ymm1, %ymm1
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm3, %zmm1, %zmm1
 ; AVX512VL-NEXT:    vporq %zmm1, %zmm2, %zmm1
-; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} zmm2 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
-; AVX512VL-NEXT:    vpternlogq $216, %zmm2, %zmm1, %zmm0
+; AVX512VL-NEXT:    vpternlogq $216, {{.*}}(%rip){1to8}, %zmm1, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: constant_funnnel_v64i8:

diff  --git a/llvm/test/CodeGen/X86/vector-fshr-128.ll b/llvm/test/CodeGen/X86/vector-fshr-128.ll
index 23fbc5e70707..b7cc39a32d71 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-128.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-128.ll
@@ -2651,9 +2651,8 @@ define <16 x i8> @constant_funnnel_v16i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; AVX512VL-NEXT:    vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
 ; AVX512VL-NEXT:    vpsllvd {{.*}}(%rip), %zmm0, %zmm0
 ; AVX512VL-NEXT:    vpord %zmm2, %zmm0, %zmm0
-; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm2
-; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} xmm0 = [18446744073709551360,18446744073709551360]
-; AVX512VL-NEXT:    vpternlogq $202, %xmm1, %xmm2, %xmm0
+; AVX512VL-NEXT:    vpmovdb %zmm0, %xmm0
+; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip){1to2}, %xmm1, %xmm0
 ; AVX512VL-NEXT:    vzeroupper
 ; AVX512VL-NEXT:    retq
 ;

diff  --git a/llvm/test/CodeGen/X86/vector-fshr-256.ll b/llvm/test/CodeGen/X86/vector-fshr-256.ll
index bd5698bc63be..bbeaed5cc725 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-256.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-256.ll
@@ -2083,9 +2083,8 @@ define <32 x i8> @constant_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y) nounwind {
 ; AVX512VL-NEXT:    vpmullw {{.*}}(%rip), %ymm2, %ymm2
 ; AVX512VL-NEXT:    vpsrlw $8, %ymm2, %ymm2
 ; AVX512VL-NEXT:    vpackuswb %ymm3, %ymm2, %ymm2
-; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm2
-; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} ymm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
-; AVX512VL-NEXT:    vpternlogq $202, %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT:    vpor %ymm2, %ymm0, %ymm0
+; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip){1to4}, %ymm1, %ymm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: constant_funnnel_v32i8:

diff  --git a/llvm/test/CodeGen/X86/vector-fshr-512.ll b/llvm/test/CodeGen/X86/vector-fshr-512.ll
index 3337ebe22fed..c89782bc359c 100644
--- a/llvm/test/CodeGen/X86/vector-fshr-512.ll
+++ b/llvm/test/CodeGen/X86/vector-fshr-512.ll
@@ -1171,9 +1171,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX512F-NEXT:    vpsrlw $8, %ymm3, %ymm3
 ; AVX512F-NEXT:    vpackuswb %ymm4, %ymm3, %ymm3
 ; AVX512F-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-NEXT:    vporq %zmm2, %zmm0, %zmm2
-; AVX512F-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
-; AVX512F-NEXT:    vpternlogq $202, %zmm1, %zmm2, %zmm0
+; AVX512F-NEXT:    vporq %zmm2, %zmm0, %zmm0
+; AVX512F-NEXT:    vpternlogq $228, {{.*}}(%rip){1to8}, %zmm1, %zmm0
 ; AVX512F-NEXT:    retq
 ;
 ; AVX512VL-LABEL: constant_funnnel_v64i8:
@@ -1223,9 +1222,8 @@ define <64 x i8> @constant_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y) nounwind {
 ; AVX512VL-NEXT:    vpsrlw $8, %ymm3, %ymm3
 ; AVX512VL-NEXT:    vpackuswb %ymm4, %ymm3, %ymm3
 ; AVX512VL-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512VL-NEXT:    vporq %zmm2, %zmm0, %zmm2
-; AVX512VL-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360,18446744073709551360]
-; AVX512VL-NEXT:    vpternlogq $202, %zmm1, %zmm2, %zmm0
+; AVX512VL-NEXT:    vporq %zmm2, %zmm0, %zmm0
+; AVX512VL-NEXT:    vpternlogq $228, {{.*}}(%rip){1to8}, %zmm1, %zmm0
 ; AVX512VL-NEXT:    retq
 ;
 ; AVX512BW-LABEL: constant_funnnel_v64i8:

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
index 1ab6f2cc45fc..cb2dd3ef7e86 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-avx512.ll
@@ -337,11 +337,15 @@ define <32 x i16> @test_mm512_mask_blend_epi16(<32 x i16> %A, <32 x i16> %W){
 ; SKX-NEXT:    vpblendmw %zmm0, %zmm1, %zmm0 {%k1}
 ; SKX-NEXT:    ret{{[l|q]}}
 ;
-; KNL-LABEL: test_mm512_mask_blend_epi16:
-; KNL:       # %bb.0: # %entry
-; KNL-NEXT:    vpbroadcastd {{.*#+}} zmm2 = [65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535,65535]
-; KNL-NEXT:    vpternlogq $216, %zmm2, %zmm1, %zmm0
-; KNL-NEXT:    ret{{[l|q]}}
+; KNL64-LABEL: test_mm512_mask_blend_epi16:
+; KNL64:       # %bb.0: # %entry
+; KNL64-NEXT:    vpternlogd $216, {{.*}}(%rip){1to16}, %zmm1, %zmm0
+; KNL64-NEXT:    retq
+;
+; KNL32-LABEL: test_mm512_mask_blend_epi16:
+; KNL32:       # %bb.0: # %entry
+; KNL32-NEXT:    vpternlogd $216, {{\.LCPI.*}}{1to16}, %zmm1, %zmm0
+; KNL32-NEXT:    retl
 entry:
   %0 = shufflevector <32 x i16> %A, <32 x i16> %W, <32 x i32>  <i32 32, i32 1, i32 34, i32 3, i32 36, i32 5, i32 38, i32 7, i32 40, i32 9, i32 42, i32 11, i32 44, i32 13, i32 46, i32 15, i32 48, i32 17, i32 50, i32 19, i32 52, i32 21, i32 54, i32 23, i32 56, i32 25, i32 58, i32 27, i32 60, i32 29, i32 62, i32 31>
   ret <32 x i16> %0


        


More information about the llvm-commits mailing list