[llvm] r295673 - [AVX-512] Add a few more patterns for selecting masked vpternlog with broadcast loads where the passthru operand is not operand 0.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 20 09:44:09 PST 2017
Author: ctopper
Date: Mon Feb 20 11:44:09 2017
New Revision: 295673
URL: http://llvm.org/viewvc/llvm-project?rev=295673&view=rev
Log:
[AVX-512] Add a few more patterns for selecting masked vpternlog with broadcast loads where the passthru operand is not operand 0.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=295673&r1=295672&r2=295673&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Feb 20 11:44:09 2017
@@ -9095,6 +9095,31 @@ multiclass avx512_ternlog<bits<8> opc, s
_.RC:$src1)),
(!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2, _.RC:$src1,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ (i8 imm:$src4)), _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG213_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode _.RC:$src2,
+ (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src1, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG231_imm8 imm:$src4))>;
+ def : Pat<(_.VT (vselect _.KRCWM:$mask,
+ (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+ _.RC:$src1, _.RC:$src2, (i8 imm:$src4)),
+ _.RC:$src1)),
+ (!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
+ _.RC:$src2, addr:$src3, (VPTERNLOG312_imm8 imm:$src4))>;
}
multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
Modified: llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll?rev=295673&r1=295672&r2=295673&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vpternlog-commute.ll Mon Feb 20 11:44:09 2017
@@ -1003,3 +1003,66 @@ define <16 x i32> @vpternlog_v16i32_021_
%res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x2, <16 x i32> %x1, i32 114, i16 %mask)
ret <16 x i32> %res
}
+
+define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask1(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
+; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpternlogd $92, (%rdi), %zmm1, %zmm0 {%k1}
+; CHECK-NEXT: retq
+ %x0scalar = load i32, i32* %x0ptr
+ %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
+ %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x1
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @vpternlog_v16i32_012_broadcast0_mask2(i32* %x0ptr, <16 x i32> %x1, <16 x i32> %x2, i16 %mask) {
+; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_mask2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpternlogd $58, (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %x0scalar = load i32, i32* %x0ptr
+ %vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
+ %x0 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x2
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask2(<16 x i32> %x0, i32* %x1ptr, <16 x i32> %x2, i16 %mask) {
+; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask2:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpternlogd $46, (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %x1scalar = load i32, i32* %x1ptr
+ %vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
+ %x1 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x2
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @vpternlog_v16i32_012_broadcast2_mask1(<16 x i32> %x0, <16 x i32> %x1, i32* %x2ptr, i16 %mask) {
+; CHECK-LABEL: vpternlog_v16i32_012_broadcast2_mask1:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpternlogd $78, (%rdi), %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %x2scalar = load i32, i32* %x2ptr
+ %vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
+ %x2 = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 114, i16 -1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %x1
+ ret <16 x i32> %res2
+}
More information about the llvm-commits
mailing list