[llvm] r295634 - [AVX-512] Add more VPTERNLOG patterns to enable folding of broadcast loads that aren't in operand 2.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 19 18:47:42 PST 2017


Author: ctopper
Date: Sun Feb 19 20:47:42 2017
New Revision: 295634

URL: http://llvm.org/viewvc/llvm-project?rev=295634&view=rev
Log:
[AVX-512] Add more VPTERNLOG patterns to enable folding of broadcast loads that aren't in operand 2.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=295634&r1=295633&r2=295634&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Feb 19 20:47:42 2017
@@ -8992,6 +8992,45 @@ multiclass avx512_ternlog<bits<8> opc, s
                    _.RC:$src1)),
             (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
              _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+  // Additional patterns for matching broadcasts in other positions.
+  def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                          _.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
+            (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
+                                   addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+  def : Pat<(_.VT (OpNode _.RC:$src1,
+                          (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                          _.RC:$src2, (i8 imm:$src4))),
+            (!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
+                                   addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+
+  // Additional patterns for matching zero masking with broadcasts in other
+  // positions.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                    _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
+             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
+             (VPTERNLOG321_imm8 imm:$src4))>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (OpNode _.RC:$src1,
+                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                    _.RC:$src2, (i8 imm:$src4)),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
+             _.KRCWM:$mask, _.RC:$src2, addr:$src3,
+             (VPTERNLOG132_imm8 imm:$src4))>;
+
+  // Additional patterns for matching masked broadcasts with different
+  // operand orders.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (OpNode _.RC:$src1,
+                    (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                    _.RC:$src2, (i8 imm:$src4)),
+                   _.RC:$src1)),
+            (!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
+             _.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
 }
 
 multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{

Modified: llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll?rev=295634&r1=295633&r2=295634&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll Sun Feb 19 20:47:42 2017
@@ -521,8 +521,7 @@ define <16 x i32> @vpternlog_v16i32_021_
 define <16 x i32> @vpternlog_v16i32_012_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
 ; CHECK-LABEL: vpternlog_v16i32_012_broadcast0:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $9, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %x0_scalar = load i32, i32* %ptr_x0
   %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
@@ -534,8 +533,7 @@ define <16 x i32> @vpternlog_v16i32_012_
 define <16 x i32> @vpternlog_v16i32_012_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
 ; CHECK-LABEL: vpternlog_v16i32_012_broadcast1:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %x1_scalar = load i32, i32* %ptr_x1
   %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
@@ -559,8 +557,7 @@ define <16 x i32> @vpternlog_v16i32_012_
 define <16 x i32> @vpternlog_v16i32_102_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
 ; CHECK-LABEL: vpternlog_v16i32_102_broadcast0:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %x0_scalar = load i32, i32* %ptr_x0
   %vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
@@ -572,8 +569,7 @@ define <16 x i32> @vpternlog_v16i32_102_
 define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
 ; CHECK-LABEL: vpternlog_v16i32_102_broadcast1:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $9, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT:    vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %x1_scalar = load i32, i32* %ptr_x1
   %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
@@ -609,9 +605,7 @@ define <16 x i32> @vpternlog_v16i32_210_
 define <16 x i32> @vpternlog_v16i32_210_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
 ; CHECK-LABEL: vpternlog_v16i32_210_broadcast1:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $9, %zmm0, %zmm1, %zmm2
-; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %x1_scalar = load i32, i32* %ptr_x1
   %vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
@@ -623,8 +617,7 @@ define <16 x i32> @vpternlog_v16i32_210_
 define <16 x i32> @vpternlog_v16i32_210_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
 ; CHECK-LABEL: vpternlog_v16i32_210_broadcast2:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0
+; CHECK-NEXT:    vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %x2_scalar = load i32, i32* %ptr_x2
   %vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
@@ -652,8 +645,7 @@ define <16 x i32> @vpternlog_v16i32_012_
 ; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %x1scalar = load i32, i32* %x1ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
@@ -679,8 +671,7 @@ define <16 x i32> @vpternlog_v16i32_102_
 ; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_mask:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %x0scalar = load i32, i32* %x0ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
@@ -736,8 +727,7 @@ define <16 x i32> @vpternlog_v16i32_210_
 ; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_mask:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm0, %zmm2, %zmm1 {%k1}
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %x1scalar = load i32, i32* %x1ptr
@@ -794,8 +784,7 @@ define <16 x i32> @vpternlog_v16i32_021_
 ; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_mask:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1}
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
 ; CHECK-NEXT:    retq
   %x2scalar = load i32, i32* %x2ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
@@ -808,8 +797,7 @@ define <16 x i32> @vpternlog_v16i32_012_
 ; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $9, %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x0scalar = load i32, i32* %x0ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
@@ -822,8 +810,7 @@ define <16 x i32> @vpternlog_v16i32_012_
 ; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x1scalar = load i32, i32* %x1ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
@@ -849,8 +836,7 @@ define <16 x i32> @vpternlog_v16i32_102_
 ; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x0scalar = load i32, i32* %x0ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
@@ -863,8 +849,7 @@ define <16 x i32> @vpternlog_v16i32_102_
 ; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $9, %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x1scalar = load i32, i32* %x1ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
@@ -903,9 +888,7 @@ define <16 x i32> @vpternlog_v16i32_210_
 ; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $9, %zmm0, %zmm1, %zmm2 {%k1} {z}
-; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm0
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x1scalar = load i32, i32* %x1ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
@@ -918,8 +901,7 @@ define <16 x i32> @vpternlog_v16i32_210_
 ; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x2scalar = load i32, i32* %x2ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
@@ -932,8 +914,7 @@ define <16 x i32> @vpternlog_v16i32_021_
 ; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x0scalar = load i32, i32* %x0ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
@@ -959,8 +940,7 @@ define <16 x i32> @vpternlog_v16i32_021_
 ; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_maskz:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    kmovw %esi, %k1
-; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm2
-; CHECK-NEXT:    vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1} {z}
+; CHECK-NEXT:    vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    retq
   %x2scalar = load i32, i32* %x2ptr
   %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0




More information about the llvm-commits mailing list