[llvm] r373156 - [X86] Move bitselect matching to vpternlog into X86ISelDAGToDAG.cpp
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sat Sep 28 18:24:30 PDT 2019
Author: ctopper
Date: Sat Sep 28 18:24:29 2019
New Revision: 373156
URL: http://llvm.org/viewvc/llvm-project?rev=373156&view=rev
Log:
[X86] Move bitselect matching to vpternlog into X86ISelDAGToDAG.cpp
This allows us to reduce the use count on the condition node before
the match. This enables load folding for that operand without
relying on the peephole pass. This will be improved on for
broadcast load folding in a subsequent commit.
This still requires a bunch of isel patterns for vXi16/vXi8 types
though.
Modified:
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/combine-bitselect.ll
llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll
llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll
llvm/trunk/test/CodeGen/X86/vector-fshl-256.ll
llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll
llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll
llvm/trunk/test/CodeGen/X86/vector-fshl-rot-256.ll
llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll
llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll
llvm/trunk/test/CodeGen/X86/vector-fshr-256.ll
llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll
llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll
llvm/trunk/test/CodeGen/X86/vector-fshr-rot-256.ll
llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll
llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Sat Sep 28 18:24:29 2019
@@ -510,6 +510,7 @@ namespace {
bool combineIncDecVector(SDNode *Node);
bool tryShrinkShlLogicImm(SDNode *N);
bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask);
+ bool tryMatchBitSelect(SDNode *N);
MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad,
const SDLoc &dl, MVT VT, SDNode *Node);
@@ -4275,6 +4276,55 @@ bool X86DAGToDAGISel::tryVPTESTM(SDNode
return true;
}
+// Try to match the bitselect pattern (or (and A, B), (andn A, C)). Turn it
+// into vpternlog.
+bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) {
+ assert(N->getOpcode() == ISD::OR && "Unexpected opcode!");
+
+ MVT NVT = N->getSimpleValueType(0);
+
+ // Make sure we support VPTERNLOG.
+ if (!NVT.isVector() || !Subtarget->hasAVX512())
+ return false;
+
+ // We need VLX for 128/256-bit.
+ if (!(Subtarget->hasVLX() || NVT.is512BitVector()))
+ return false;
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+
+ // Canonicalize AND to LHS.
+ if (N1.getOpcode() == ISD::AND)
+ std::swap(N0, N1);
+
+ if (N0.getOpcode() != ISD::AND ||
+ N1.getOpcode() != X86ISD::ANDNP ||
+ !N0.hasOneUse() || !N1.hasOneUse())
+ return false;
+
+ // ANDN is not commutable, use it to pick down A and C.
+ SDValue A = N1.getOperand(0);
+ SDValue C = N1.getOperand(1);
+
+ // AND is commutable, if one operand matches A, the other operand is B.
+ // Otherwise this isn't a match.
+ SDValue B;
+ if (N0.getOperand(0) == A)
+ B = N0.getOperand(1);
+ else if (N0.getOperand(1) == A)
+ B = N0.getOperand(0);
+ else
+ return false;
+
+ SDLoc dl(N);
+ SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8);
+ SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm);
+ ReplaceNode(N, Ternlog.getNode());
+ SelectCode(Ternlog.getNode());
+ return true;
+}
+
void X86DAGToDAGISel::Select(SDNode *Node) {
MVT NVT = Node->getSimpleValueType(0);
unsigned Opcode = Node->getOpcode();
@@ -4433,6 +4483,9 @@ void X86DAGToDAGISel::Select(SDNode *Nod
if (tryShrinkShlLogicImm(Node))
return;
+ if (Opcode == ISD::OR && tryMatchBitSelect(Node))
+ return;
+
LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB: {
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sat Sep 28 18:24:29 2019
@@ -11436,6 +11436,113 @@ defm VPTERNLOGD : avx512_common_ternlog<
defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", SchedWriteVecALU,
avx512vl_i64_info>, VEX_W;
+// Patterns to use VPTERNLOG for vXi16/vXi8 vectors.
+let Predicates = [HasVLX] in {
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (loadv16i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i8 (X86vpternlog (loadv16i8 addr:$src3), VR128X:$src2,
+ VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i8 (X86vpternlog VR128X:$src1, (loadv16i8 addr:$src3),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, VR128X:$src2,
+ (loadv8i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v8i16 (X86vpternlog (loadv8i16 addr:$src3), VR128X:$src2,
+ VR128X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v8i16 (X86vpternlog VR128X:$src1, (loadv8i16 addr:$src3),
+ VR128X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ128rmi VR128X:$src1, VR128X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (loadv32i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i8 (X86vpternlog (loadv32i8 addr:$src3), VR256X:$src2,
+ VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i8 (X86vpternlog VR256X:$src1, (loadv32i8 addr:$src3),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, VR256X:$src2,
+ (loadv16i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v16i16 (X86vpternlog (loadv16i16 addr:$src3), VR256X:$src2,
+ VR256X:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v16i16 (X86vpternlog VR256X:$src1, (loadv16i16 addr:$src3),
+ VR256X:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZ256rmi VR256X:$src1, VR256X:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
+let Predicates = [HasAVX512] in {
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, VR512:$src2,
+ (loadv64i8 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v64i8 (X86vpternlog (loadv64i8 addr:$src3), VR512:$src2,
+ VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v64i8 (X86vpternlog VR512:$src1, (loadv64i8 addr:$src3),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2, VR512:$src3,
+ (i8 timm:$src4))),
+ (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, VR512:$src2,
+ (loadv32i16 addr:$src3), (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ timm:$src4)>;
+ def : Pat<(v32i16 (X86vpternlog (loadv32i16 addr:$src3), VR512:$src2,
+ VR512:$src1, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG321_imm8 timm:$src4))>;
+ def : Pat<(v32i16 (X86vpternlog VR512:$src1, (loadv32i16 addr:$src3),
+ VR512:$src2, (i8 timm:$src4))),
+ (VPTERNLOGQZrmi VR512:$src1, VR512:$src2, addr:$src3,
+ (VPTERNLOG132_imm8 timm:$src4))>;
+}
+
// Patterns to implement vnot using vpternlog instead of creating all ones
// using pcmpeq or vpternlog and then xoring with that. The value 15 is chosen
// so that the result is only dependent on src0. But we use the same source
@@ -11533,49 +11640,6 @@ let Predicates = [HasVLX] in {
(VPTERNLOGQZ256rri VR256X:$src, VR256X:$src, VR256X:$src, (i8 15))>;
}
-let Predicates = [HasVLX] in {
- def : Pat<(v16i8 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
- def : Pat<(v8i16 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
- def : Pat<(v4i32 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
- def : Pat<(v2i64 (or (and VR128X:$src1, VR128X:$src2),
- (X86andnp VR128X:$src1, VR128X:$src3))),
- (VPTERNLOGQZ128rri VR128X:$src1, VR128X:$src2, VR128X:$src3, (i8 202))>;
-
- def : Pat<(v32i8 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
- def : Pat<(v16i16 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
- def : Pat<(v8i32 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
- def : Pat<(v4i64 (or (and VR256X:$src1, VR256X:$src2),
- (X86andnp VR256X:$src1, VR256X:$src3))),
- (VPTERNLOGQZ256rri VR256X:$src1, VR256X:$src2, VR256X:$src3, (i8 202))>;
-}
-
-let Predicates = [HasAVX512] in {
- def : Pat<(v64i8 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
- def : Pat<(v32i16 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
- def : Pat<(v16i32 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
- def : Pat<(v8i64 (or (and VR512:$src1, VR512:$src2),
- (X86andnp VR512:$src1, VR512:$src3))),
- (VPTERNLOGQZrri VR512:$src1, VR512:$src2, VR512:$src3, (i8 202))>;
-}
-
//===----------------------------------------------------------------------===//
// AVX-512 - FixupImm
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/test/CodeGen/X86/combine-bitselect.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/combine-bitselect.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/combine-bitselect.ll (original)
+++ llvm/trunk/test/CodeGen/X86/combine-bitselect.ll Sat Sep 28 18:24:29 2019
@@ -548,9 +548,9 @@ define <8 x i64> @bitselect_v8i64_mm(<8
;
; AVX512F-LABEL: bitselect_v8i64_mm:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vmovdqa64 (%rdi), %zmm1
-; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm0
-; AVX512F-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512F-NEXT: vmovdqa64 (%rsi), %zmm1
+; AVX512F-NEXT: vmovdqa64 {{.*#+}} zmm0 = [18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022,18446744073709551612,18446744065119617022]
+; AVX512F-NEXT: vpternlogq $202, (%rdi), %zmm1, %zmm0
; AVX512F-NEXT: retq
%3 = load <8 x i64>, <8 x i64>* %0
%4 = load <8 x i64>, <8 x i64>* %1
Modified: llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vec-copysign-avx512.ll Sat Sep 28 18:24:29 2019
@@ -6,7 +6,7 @@ define <4 x float> @v4f32(<4 x float> %a
; CHECK-LABEL: v4f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpbroadcastd {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpternlogq $226, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: vpternlogd $226, %xmm1, %xmm2, %xmm0
; CHECK-NEXT: retq
%tmp = tail call <4 x float> @llvm.copysign.v4f32( <4 x float> %a, <4 x float> %b )
ret <4 x float> %tmp
@@ -16,7 +16,7 @@ define <8 x float> @v8f32(<8 x float> %a
; CHECK-LABEL: v8f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpbroadcastd {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpternlogq $226, %ymm1, %ymm2, %ymm0
+; CHECK-NEXT: vpternlogd $226, %ymm1, %ymm2, %ymm0
; CHECK-NEXT: retq
%tmp = tail call <8 x float> @llvm.copysign.v8f32( <8 x float> %a, <8 x float> %b )
ret <8 x float> %tmp
@@ -26,7 +26,7 @@ define <16 x float> @v16f32(<16 x float>
; CHECK-LABEL: v16f32:
; CHECK: ## %bb.0:
; CHECK-NEXT: vpbroadcastd {{.*#+}} zmm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
-; CHECK-NEXT: vpternlogq $226, %zmm1, %zmm2, %zmm0
+; CHECK-NEXT: vpternlogd $226, %zmm1, %zmm2, %zmm0
; CHECK-NEXT: retq
%tmp = tail call <16 x float> @llvm.copysign.v16f32( <16 x float> %a, <16 x float> %b )
ret <16 x float> %tmp
Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-128.ll Sat Sep 28 18:24:29 2019
@@ -2991,9 +2991,9 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -3016,16 +3016,16 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-256.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-256.ll Sat Sep 28 18:24:29 2019
@@ -2514,9 +2514,9 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -2539,16 +2539,16 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-512.ll Sat Sep 28 18:24:29 2019
@@ -1559,30 +1559,30 @@ define <64 x i8> @splatconstant_funnnel_
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-rot-128.ll Sat Sep 28 18:24:29 2019
@@ -1846,9 +1846,9 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -1862,9 +1862,9 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VLBW-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-rot-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-rot-256.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-rot-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-rot-256.ll Sat Sep 28 18:24:29 2019
@@ -436,14 +436,14 @@ define <32 x i8> @var_funnnel_v32i8(<32
;
; AVX512VL-LABEL: var_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
@@ -1504,9 +1504,9 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -1520,9 +1520,9 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshl-rot-512.ll Sat Sep 28 18:24:29 2019
@@ -829,16 +829,16 @@ define <64 x i8> @splatconstant_funnnel_
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
%res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-128.ll Sat Sep 28 18:24:29 2019
@@ -3012,9 +3012,9 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VL-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -3037,16 +3037,16 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %xmm0, %xmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %xmm1, %xmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm2, %xmm0
; AVX512VLVBMI2-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-256.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-256.ll Sat Sep 28 18:24:29 2019
@@ -2515,9 +2515,9 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -2540,16 +2540,16 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %ymm1, %ymm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm0
; AVX512VLVBMI2-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-512.ll Sat Sep 28 18:24:29 2019
@@ -1543,30 +1543,30 @@ define <64 x i8> @splatconstant_funnnel_
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512BW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VBMI2: # %bb.0:
-; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VBMI2-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLBW-NEXT: retq
;
; AVX512VLVBMI2-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLVBMI2: # %bb.0:
-; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm1
-; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLVBMI2-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpsllw $4, %zmm0, %zmm2
+; AVX512VLVBMI2-NEXT: vpsrlw $4, %zmm1, %zmm0
+; AVX512VLVBMI2-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm2, %zmm0
; AVX512VLVBMI2-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-rot-128.ll Sat Sep 28 18:24:29 2019
@@ -1928,9 +1928,9 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v16i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v16i8:
@@ -1944,9 +1944,9 @@ define <16 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v16i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm1
-; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %xmm1, %xmm0
+; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %xmm1, %xmm0
; AVX512VLBW-NEXT: retq
;
; XOP-LABEL: splatconstant_funnnel_v16i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-rot-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-rot-256.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-rot-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-rot-256.ll Sat Sep 28 18:24:29 2019
@@ -475,16 +475,16 @@ define <32 x i8> @var_funnnel_v32i8(<32
;
; AVX512VL-LABEL: var_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX512VL-NEXT: vpsubb %ymm1, %ymm2, %ymm1
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
@@ -1582,9 +1582,9 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VL-LABEL: splatconstant_funnnel_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_funnnel_v32i8:
@@ -1598,9 +1598,9 @@ define <32 x i8> @splatconstant_funnnel_
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v32i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VLBW-NEXT: retq
;
; XOPAVX1-LABEL: splatconstant_funnnel_v32i8:
Modified: llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-fshr-rot-512.ll Sat Sep 28 18:24:29 2019
@@ -849,16 +849,16 @@ define <64 x i8> @splatconstant_funnnel_
;
; AVX512BW-LABEL: splatconstant_funnnel_v64i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512BW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512BW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512BW-NEXT: retq
;
; AVX512VLBW-LABEL: splatconstant_funnnel_v64i8:
; AVX512VLBW: # %bb.0:
-; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm1
-; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm0
-; AVX512VLBW-NEXT: vpternlogq $228, {{.*}}(%rip), %zmm1, %zmm0
+; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1
+; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0
+; AVX512VLBW-NEXT: vpternlogq $216, {{.*}}(%rip), %zmm1, %zmm0
; AVX512VLBW-NEXT: retq
%res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>)
ret <64 x i8> %res
Modified: llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll?rev=373156&r1=373155&r2=373156&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-rotate-256.ll Sat Sep 28 18:24:29 2019
@@ -432,14 +432,14 @@ define <32 x i8> @var_rotate_v32i8(<32 x
;
; AVX512VL-LABEL: var_rotate_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
-; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm2
-; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm3
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm2, %ymm3
+; AVX512VL-NEXT: vpsllw $2, %ymm0, %ymm2
+; AVX512VL-NEXT: vpsrlw $6, %ymm0, %ymm3
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3
; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1
; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0
; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2
@@ -1505,9 +1505,9 @@ define <32 x i8> @splatconstant_rotate_v
;
; AVX512VL-LABEL: splatconstant_rotate_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: splatconstant_rotate_v32i8:
@@ -1787,9 +1787,9 @@ define <32 x i8> @splatconstant_rotate_m
;
; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm1
-; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0
-; AVX512VL-NEXT: vpternlogq $228, {{.*}}(%rip), %ymm1, %ymm0
+; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1
+; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0
+; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0
; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
; AVX512VL-NEXT: retq
;
More information about the llvm-commits
mailing list