[llvm] 9d1721c - [X86][SSE] Prefer PACKUS(AND(),AND()) to SHUFFLE(PSHUFB(),PSHUFB()) on pre-AVX2 targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 26 08:48:00 PDT 2020
Author: Simon Pilgrim
Date: 2020-03-26T15:47:43Z
New Revision: 9d1721ce39269387f56314a79b26581f99b236fb
URL: https://github.com/llvm/llvm-project/commit/9d1721ce39269387f56314a79b26581f99b236fb
DIFF: https://github.com/llvm/llvm-project/commit/9d1721ce39269387f56314a79b26581f99b236fb.diff
LOG: [X86][SSE] Prefer PACKUS(AND(),AND()) to SHUFFLE(PSHUFB(),PSHUFB()) on pre-AVX2 targets
As discussed on PR31443, we should be trying to use PACKUS for binary truncation patterns to reduce the number of shuffles.
The plan is to support AVX2+ targets once we've worked around PR45315 - we fail to peek through a VBROADCAST_LOAD mask to recognise zero upper bits in a PACKUS pattern.
We should also be able to add support for v8i16 and possibly 256/512-bit vectors as well.
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/masked_store_trunc.ll
llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
llvm/test/CodeGen/X86/vector-trunc.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4e9f0a86924d..d0ee70deb2d5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -14817,6 +14817,10 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
Zeroable, Subtarget, DAG))
return V;
+ // Check for compaction patterns.
+ bool IsSingleInput = V2.isUndef();
+ int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput);
+
// Check for SSSE3 which lets us lower all v16i8 shuffles much more directly
// with PSHUFB. It is important to do this before we attempt to generate any
// blends but after all of the single-input lowerings. If the single input
@@ -14827,10 +14831,16 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// and there are *very* few patterns that would actually be faster than the
// PSHUFB approach because of its ability to zero lanes.
//
+ // If the mask is a binary compaction, we can more efficiently perform this
+ // as a PACKUS(AND(),AND()) - which is quicker than UNPACK(PSHUFB(),PSHUFB()).
+ // TODO: AVX2+ sees a regression as they fail to see through VBROADCAST_LOAD
+ // masks.
+ //
// FIXME: The only exceptions to the above are blends which are exact
// interleavings with direct instructions supporting them. We currently don't
// handle those well here.
- if (Subtarget.hasSSSE3()) {
+ if (Subtarget.hasSSSE3() &&
+ (Subtarget.hasInt256() || IsSingleInput || NumEvenDrops != 1)) {
bool V1InUse = false;
bool V2InUse = false;
@@ -14888,8 +14898,7 @@ static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask,
// We special case these as they can be particularly efficiently handled with
// the PACKUSB instruction on x86 and they show up in common patterns of
// rearranging bytes to truncate wide elements.
- bool IsSingleInput = V2.isUndef();
- if (int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput)) {
+ if (NumEvenDrops) {
// NumEvenDrops is the power of two stride of the elements. Another way of
// thinking about it is that we need to drop the even elements this many
// times to get the original input.
diff --git a/llvm/test/CodeGen/X86/masked_store_trunc.ll b/llvm/test/CodeGen/X86/masked_store_trunc.ll
index 2feb0382d8c9..3fecf6b8d4c3 100644
--- a/llvm/test/CodeGen/X86/masked_store_trunc.ll
+++ b/llvm/test/CodeGen/X86/masked_store_trunc.ll
@@ -4652,10 +4652,10 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
; SSE4-LABEL: truncstore_v32i16_v32i8:
; SSE4: # %bb.0:
; SSE4-NEXT: pxor %xmm7, %xmm7
-; SSE4-NEXT: movdqa {{.*#+}} xmm6 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE4-NEXT: pshufb %xmm6, %xmm1
-; SSE4-NEXT: pshufb %xmm6, %xmm0
-; SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE4-NEXT: movdqa {{.*#+}} xmm6 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE4-NEXT: pand %xmm6, %xmm1
+; SSE4-NEXT: pand %xmm6, %xmm0
+; SSE4-NEXT: packuswb %xmm1, %xmm0
; SSE4-NEXT: pcmpeqb %xmm7, %xmm4
; SSE4-NEXT: pmovmskb %xmm4, %ecx
; SSE4-NEXT: xorl $65535, %ecx # imm = 0xFFFF
@@ -4711,14 +4711,14 @@ define void @truncstore_v32i16_v32i8(<32 x i16> %x, <32 x i8>* %p, <32 x i8> %ma
; SSE4-NEXT: .LBB15_29: # %cond.store27
; SSE4-NEXT: pextrb $14, %xmm0, 14(%rdi)
; SSE4-NEXT: .LBB15_30: # %else28
-; SSE4-NEXT: pshufb %xmm6, %xmm3
-; SSE4-NEXT: pshufb %xmm6, %xmm2
+; SSE4-NEXT: pand %xmm6, %xmm3
+; SSE4-NEXT: pand %xmm6, %xmm2
; SSE4-NEXT: testl $32768, %eax # imm = 0x8000
; SSE4-NEXT: je .LBB15_32
; SSE4-NEXT: # %bb.31: # %cond.store29
; SSE4-NEXT: pextrb $15, %xmm0, 15(%rdi)
; SSE4-NEXT: .LBB15_32: # %else30
-; SSE4-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
+; SSE4-NEXT: packuswb %xmm3, %xmm2
; SSE4-NEXT: testl $65536, %eax # imm = 0x10000
; SSE4-NEXT: jne .LBB15_33
; SSE4-NEXT: # %bb.34: # %else32
@@ -5750,10 +5750,10 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma
; SSE4-LABEL: truncstore_v16i16_v16i8:
; SSE4: # %bb.0:
; SSE4-NEXT: pxor %xmm3, %xmm3
-; SSE4-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE4-NEXT: pshufb %xmm4, %xmm1
-; SSE4-NEXT: pshufb %xmm4, %xmm0
-; SSE4-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; SSE4-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE4-NEXT: pand %xmm4, %xmm1
+; SSE4-NEXT: pand %xmm4, %xmm0
+; SSE4-NEXT: packuswb %xmm1, %xmm0
; SSE4-NEXT: pcmpeqb %xmm2, %xmm3
; SSE4-NEXT: pmovmskb %xmm3, %eax
; SSE4-NEXT: xorl $65535, %eax # imm = 0xFFFF
diff --git a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
index e0545c8a584d..7317f424a4d7 100644
--- a/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
+++ b/llvm/test/CodeGen/X86/shuffle-vs-trunc-256.ll
@@ -13,16 +13,25 @@
; Ideally, the shuffles should be lowered to code with the same quality as the truncates.
define void @shuffle_v32i8_to_v16i8(<32 x i8>* %L, <16 x i8>* %S) nounwind {
-; AVX-LABEL: shuffle_v32i8_to_v16i8:
-; AVX: # %bb.0:
-; AVX-NEXT: vmovdqa (%rdi), %xmm0
-; AVX-NEXT: vmovdqa 16(%rdi), %xmm1
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT: vmovdqa %xmm0, (%rsi)
-; AVX-NEXT: retq
+; AVX1-LABEL: shuffle_v32i8_to_v16i8:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm0 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX1-NEXT: vpand 16(%rdi), %xmm0, %xmm1
+; AVX1-NEXT: vpand (%rdi), %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vmovdqa %xmm0, (%rsi)
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: shuffle_v32i8_to_v16i8:
+; AVX2: # %bb.0:
+; AVX2-NEXT: vmovdqa (%rdi), %xmm0
+; AVX2-NEXT: vmovdqa 16(%rdi), %xmm1
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: vmovdqa %xmm0, (%rsi)
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: shuffle_v32i8_to_v16i8:
; AVX512F: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
index 3457450a3ee0..a4af6c1eb1e0 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-and-bool.ll
@@ -356,29 +356,17 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
}
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
-; SSE2-LABEL: trunc_v16i16_v16i1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: psllw $7, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: cmpw $-1, %ax
-; SSE2-NEXT: sete %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: trunc_v16i16_v16i1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: pshufb %xmm2, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: psllw $7, %xmm0
-; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: cmpw $-1, %ax
-; SSE41-NEXT: sete %al
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc_v16i16_v16i1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: psllw $7, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: sete %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_v16i16_v16i1:
; AVX1: # %bb.0:
@@ -695,37 +683,21 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
}
define i1 @trunc_v32i16_v32i1(<32 x i16>) {
-; SSE2-LABEL: trunc_v32i16_v32i1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm4, %xmm3
-; SSE2-NEXT: pand %xmm4, %xmm2
-; SSE2-NEXT: packuswb %xmm3, %xmm2
-; SSE2-NEXT: pand %xmm4, %xmm1
-; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: psllw $7, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: cmpw $-1, %ax
-; SSE2-NEXT: sete %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: trunc_v32i16_v32i1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm4, %xmm3
-; SSE41-NEXT: pshufb %xmm4, %xmm2
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT: pshufb %xmm4, %xmm1
-; SSE41-NEXT: pshufb %xmm4, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: pand %xmm2, %xmm0
-; SSE41-NEXT: psllw $7, %xmm0
-; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: cmpw $-1, %ax
-; SSE41-NEXT: sete %al
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc_v32i16_v32i1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm4, %xmm3
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: pand %xmm4, %xmm1
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: psllw $7, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: cmpw $-1, %ax
+; SSE-NEXT: sete %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_v32i16_v32i1:
; AVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
index 5a0deab79ae9..025dbcb3ff63 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-or-bool.ll
@@ -350,29 +350,17 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
}
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
-; SSE2-LABEL: trunc_v16i16_v16i1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: psllw $7, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: testw %ax, %ax
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: trunc_v16i16_v16i1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: pshufb %xmm2, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: psllw $7, %xmm0
-; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: testw %ax, %ax
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc_v16i16_v16i1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: psllw $7, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_v16i16_v16i1:
; AVX1: # %bb.0:
@@ -689,37 +677,21 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
}
define i1 @trunc_v32i16_v32i1(<32 x i16>) {
-; SSE2-LABEL: trunc_v32i16_v32i1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm4, %xmm3
-; SSE2-NEXT: pand %xmm4, %xmm2
-; SSE2-NEXT: packuswb %xmm3, %xmm2
-; SSE2-NEXT: pand %xmm4, %xmm1
-; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: por %xmm2, %xmm0
-; SSE2-NEXT: psllw $7, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: testw %ax, %ax
-; SSE2-NEXT: setne %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: trunc_v32i16_v32i1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm4, %xmm3
-; SSE41-NEXT: pshufb %xmm4, %xmm2
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT: pshufb %xmm4, %xmm1
-; SSE41-NEXT: pshufb %xmm4, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: por %xmm2, %xmm0
-; SSE41-NEXT: psllw $7, %xmm0
-; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: testw %ax, %ax
-; SSE41-NEXT: setne %al
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc_v32i16_v32i1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm4, %xmm3
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: pand %xmm4, %xmm1
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: por %xmm2, %xmm0
+; SSE-NEXT: psllw $7, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: testw %ax, %ax
+; SSE-NEXT: setne %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_v32i16_v32i1:
; AVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
index 8014f1f41516..43b6ef57d46f 100644
--- a/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
+++ b/llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll
@@ -365,33 +365,19 @@ define i1 @trunc_v8i32_v8i1(<8 x i32>) {
}
define i1 @trunc_v16i16_v16i1(<16 x i16>) {
-; SSE2-LABEL: trunc_v16i16_v16i1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: psllw $7, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $8, %ecx
-; SSE2-NEXT: xorb %al, %cl
-; SSE2-NEXT: setnp %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: trunc_v16i16_v16i1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: pshufb %xmm2, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: psllw $7, %xmm0
-; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: movl %eax, %ecx
-; SSE41-NEXT: shrl $8, %ecx
-; SSE41-NEXT: xorb %al, %cl
-; SSE41-NEXT: setnp %al
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc_v16i16_v16i1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: psllw $7, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: shrl $8, %ecx
+; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: setnp %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_v16i16_v16i1:
; AVX1: # %bb.0:
@@ -775,41 +761,23 @@ define i1 @trunc_v16i32_v16i1(<16 x i32>) {
}
define i1 @trunc_v32i16_v32i1(<32 x i16>) {
-; SSE2-LABEL: trunc_v32i16_v32i1:
-; SSE2: # %bb.0:
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm4, %xmm3
-; SSE2-NEXT: pand %xmm4, %xmm2
-; SSE2-NEXT: packuswb %xmm3, %xmm2
-; SSE2-NEXT: pand %xmm4, %xmm1
-; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: pxor %xmm2, %xmm0
-; SSE2-NEXT: psllw $7, %xmm0
-; SSE2-NEXT: pmovmskb %xmm0, %eax
-; SSE2-NEXT: movl %eax, %ecx
-; SSE2-NEXT: shrl $8, %ecx
-; SSE2-NEXT: xorb %al, %cl
-; SSE2-NEXT: setnp %al
-; SSE2-NEXT: retq
-;
-; SSE41-LABEL: trunc_v32i16_v32i1:
-; SSE41: # %bb.0:
-; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm4, %xmm3
-; SSE41-NEXT: pshufb %xmm4, %xmm2
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT: pshufb %xmm4, %xmm1
-; SSE41-NEXT: pshufb %xmm4, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: pxor %xmm2, %xmm0
-; SSE41-NEXT: psllw $7, %xmm0
-; SSE41-NEXT: pmovmskb %xmm0, %eax
-; SSE41-NEXT: movl %eax, %ecx
-; SSE41-NEXT: shrl $8, %ecx
-; SSE41-NEXT: xorb %al, %cl
-; SSE41-NEXT: setnp %al
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc_v32i16_v32i1:
+; SSE: # %bb.0:
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm4, %xmm3
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: pand %xmm4, %xmm1
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: psllw $7, %xmm0
+; SSE-NEXT: pmovmskb %xmm0, %eax
+; SSE-NEXT: movl %eax, %ecx
+; SSE-NEXT: shrl $8, %ecx
+; SSE-NEXT: xorb %al, %cl
+; SSE-NEXT: setnp %al
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc_v32i16_v32i1:
; AVX1: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
index f3b35423bd7a..5608917cc84c 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll
@@ -1969,36 +1969,20 @@ define <16 x i8> @shuffle_v16i8_02_03_04_05_06_07_00_01_10_11_12_13_14_15_08_09(
}
define <16 x i8> @PR12412(<16 x i8> %inval1, <16 x i8> %inval2) {
-; SSE2-LABEL: PR12412:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: PR12412:
-; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT: pshufb %xmm2, %xmm1
-; SSSE3-NEXT: pshufb %xmm2, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: PR12412:
-; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: pshufb %xmm2, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: retq
+; SSE-LABEL: PR12412:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX1-LABEL: PR12412:
; AVX1: # %bb.0: # %entry
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: PR12412:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
index 3ad9ff10f2ba..475576687741 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v32.ll
@@ -4753,14 +4753,14 @@ define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_
; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_32_34_36_38_40_42_44_46_16_18_20_22_24_26_28_30_48_50_52_54_56_58_60_62:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4
-; AVX1-NEXT: vpshufb %xmm3, %xmm4, %xmm4
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm4[0],xmm2[0]
-; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX1-NEXT: vpand %xmm3, %xmm4, %xmm4
+; AVX1-NEXT: vpackuswb %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
@@ -4808,14 +4808,14 @@ define <32 x i8> @shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_
; AVX1-LABEL: shuffle_v32i8_00_02_04_06_08_10_12_14_16_18_20_22_24_26_28_30_32_34_36_38_40_42_44_46_48_50_52_54_56_58_60_62:
; AVX1: # %bb.0:
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpshufb %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm3 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpand %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpshufb %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
+; AVX1-NEXT: vpand %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpand %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index 87235ed9c69d..0e42991c3b2e 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -1026,32 +1026,14 @@ entry:
;PR25684
define void @trunc16i16_16i8(<16 x i16> %a) {
-; SSE2-LABEL: trunc16i16_16i8:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: movdqu %xmm0, (%rax)
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: trunc16i16_16i8:
-; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT: pshufb %xmm2, %xmm1
-; SSSE3-NEXT: pshufb %xmm2, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT: movdqu %xmm0, (%rax)
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: trunc16i16_16i8:
-; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: pshufb %xmm2, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: movdqu %xmm0, (%rax)
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc16i16_16i8:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: movdqu %xmm0, (%rax)
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc16i16_16i8:
; AVX1: # %bb.0: # %entry
@@ -1235,44 +1217,18 @@ entry:
}
define void @trunc32i16_32i8(<32 x i16> %a) {
-; SSE2-LABEL: trunc32i16_32i8:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm4, %xmm1
-; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: pand %xmm4, %xmm3
-; SSE2-NEXT: pand %xmm4, %xmm2
-; SSE2-NEXT: packuswb %xmm3, %xmm2
-; SSE2-NEXT: movdqu %xmm2, (%rax)
-; SSE2-NEXT: movdqu %xmm0, (%rax)
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: trunc32i16_32i8:
-; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT: pshufb %xmm4, %xmm1
-; SSSE3-NEXT: pshufb %xmm4, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT: pshufb %xmm4, %xmm3
-; SSSE3-NEXT: pshufb %xmm4, %xmm2
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSSE3-NEXT: movdqu %xmm2, (%rax)
-; SSSE3-NEXT: movdqu %xmm0, (%rax)
-; SSSE3-NEXT: retq
-;
-; SSE41-LABEL: trunc32i16_32i8:
-; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm4, %xmm1
-; SSE41-NEXT: pshufb %xmm4, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: pshufb %xmm4, %xmm3
-; SSE41-NEXT: pshufb %xmm4, %xmm2
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
-; SSE41-NEXT: movdqu %xmm2, (%rax)
-; SSE41-NEXT: movdqu %xmm0, (%rax)
-; SSE41-NEXT: retq
+; SSE-LABEL: trunc32i16_32i8:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm4, %xmm1
+; SSE-NEXT: pand %xmm4, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: pand %xmm4, %xmm3
+; SSE-NEXT: pand %xmm4, %xmm2
+; SSE-NEXT: packuswb %xmm3, %xmm2
+; SSE-NEXT: movdqu %xmm2, (%rax)
+; SSE-NEXT: movdqu %xmm0, (%rax)
+; SSE-NEXT: retq
;
; AVX1-LABEL: trunc32i16_32i8:
; AVX1: # %bb.0: # %entry
@@ -1726,37 +1682,29 @@ entry:
}
define <16 x i8> @trunc2x8i16_16i8(<8 x i16> %a, <8 x i16> %b) {
-; SSE2-LABEL: trunc2x8i16_16i8:
-; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
-; SSE2-NEXT: pand %xmm2, %xmm1
-; SSE2-NEXT: pand %xmm2, %xmm0
-; SSE2-NEXT: packuswb %xmm1, %xmm0
-; SSE2-NEXT: retq
-;
-; SSSE3-LABEL: trunc2x8i16_16i8:
-; SSSE3: # %bb.0: # %entry
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSSE3-NEXT: pshufb %xmm2, %xmm1
-; SSSE3-NEXT: pshufb %xmm2, %xmm0
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSSE3-NEXT: retq
+; SSE-LABEL: trunc2x8i16_16i8:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: packuswb %xmm1, %xmm0
+; SSE-NEXT: retq
;
-; SSE41-LABEL: trunc2x8i16_16i8:
-; SSE41: # %bb.0: # %entry
-; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: pshufb %xmm2, %xmm0
-; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; SSE41-NEXT: retq
+; AVX1-LABEL: trunc2x8i16_16i8:
+; AVX1: # %bb.0: # %entry
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0]
+; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
;
-; AVX-LABEL: trunc2x8i16_16i8:
-; AVX: # %bb.0: # %entry
-; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
-; AVX-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vpshufb %xmm2, %xmm0, %xmm0
-; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
-; AVX-NEXT: retq
+; AVX2-LABEL: trunc2x8i16_16i8:
+; AVX2: # %bb.0: # %entry
+; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
+; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX2-NEXT: retq
;
; AVX512F-LABEL: trunc2x8i16_16i8:
; AVX512F: # %bb.0: # %entry
More information about the llvm-commits
mailing list