[llvm] 38bfe9a - [DAG] combineVSelectWithAllOnesOrZeros - missing freeze (#150388)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 4 07:55:16 PDT 2025
Author: woruyu
Date: 2025-08-04T15:55:12+01:00
New Revision: 38bfe9ae56e5bd35d68a178e99453fc0a1fbefda
URL: https://github.com/llvm/llvm-project/commit/38bfe9ae56e5bd35d68a178e99453fc0a1fbefda
DIFF: https://github.com/llvm/llvm-project/commit/38bfe9ae56e5bd35d68a178e99453fc0a1fbefda.diff
LOG: [DAG] combineVSelectWithAllOnesOrZeros - missing freeze (#150388)
This PR resolves https://github.com/llvm/llvm-project/issues/150069
---------
Co-authored-by: Simon Pilgrim <llvm-dev at redking.me.uk>
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/AArch64/vselect-ext.ll
llvm/test/CodeGen/X86/avg-mask.ll
llvm/test/CodeGen/X86/avx512-ext.ll
llvm/test/CodeGen/X86/pr78897.ll
llvm/test/CodeGen/X86/sqrt-fastmath.ll
llvm/test/CodeGen/X86/ushl_sat_vec.ll
llvm/test/CodeGen/X86/var-permute-128.ll
llvm/test/CodeGen/X86/vector-bo-select.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index decec1c570bb9..c008135854621 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13186,14 +13186,14 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
// select Cond, -1, x → or Cond, x
if (IsTAllOne) {
- SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, Or);
}
// select Cond, x, 0 → and Cond, x
if (IsFAllZero) {
- SDValue X = DAG.getBitcast(CondVT, TVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(TVal));
SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, X);
return DAG.getBitcast(VT, And);
}
@@ -13201,7 +13201,7 @@ static SDValue combineVSelectWithAllOnesOrZeros(SDValue Cond, SDValue TVal,
// select Cond, 0, x -> and not(Cond), x
if (IsTAllZero &&
(isBitwiseNot(peekThroughBitcasts(Cond)) || TLI.hasAndNot(Cond))) {
- SDValue X = DAG.getBitcast(CondVT, FVal);
+ SDValue X = DAG.getBitcast(CondVT, DAG.getFreeze(FVal));
SDValue And =
DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), X);
return DAG.getBitcast(VT, And);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ce4c061725f7b..2825037e71edc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9132,11 +9132,17 @@ LowerBUILD_VECTORAsVariablePermute(SDValue V, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
SDValue SrcVec, IndicesVec;
+
+ auto PeekThroughFreeze = [](SDValue N) {
+ if (N->getOpcode() == ISD::FREEZE && N.hasOneUse())
+ return N->getOperand(0);
+ return N;
+ };
// Check for a match of the permute source vector and permute index elements.
// This is done by checking that the i-th build_vector operand is of the form:
// (extract_elt SrcVec, (extract_elt IndicesVec, i)).
for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) {
- SDValue Op = V.getOperand(Idx);
+ SDValue Op = PeekThroughFreeze(V.getOperand(Idx));
if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll
index 4f2b9c5a62669..9fe8c50640981 100644
--- a/llvm/test/CodeGen/AArch64/vselect-ext.ll
+++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll
@@ -175,12 +175,12 @@ define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i1
; CHECK: ; %bb.0:
; CHECK-NEXT: bic.8h v0, #128, lsl #8
; CHECK-NEXT: movi.4s v1, #10
-; CHECK-NEXT: ushll2.4s v2, v0, #0
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: cmhi.4s v3, v0, v1
-; CHECK-NEXT: cmhi.4s v1, v2, v1
-; CHECK-NEXT: and.16b v1, v1, v2
-; CHECK-NEXT: and.16b v0, v3, v0
+; CHECK-NEXT: ushll.4s v2, v0, #0
+; CHECK-NEXT: ushll2.4s v0, v0, #0
+; CHECK-NEXT: cmhi.4s v3, v2, v1
+; CHECK-NEXT: cmhi.4s v1, v0, v1
+; CHECK-NEXT: and.16b v1, v1, v0
+; CHECK-NEXT: and.16b v0, v3, v2
; CHECK-NEXT: ret
%ext = zext <8 x i15> %a to <8 x i32>
%cmp = icmp ugt <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
@@ -289,12 +289,12 @@ define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13>
; CHECK: ; %bb.0:
; CHECK-NEXT: bic.8h v0, #224, lsl #8
; CHECK-NEXT: movi.4s v1, #10
-; CHECK-NEXT: ushll2.4s v2, v0, #0
-; CHECK-NEXT: ushll.4s v0, v0, #0
-; CHECK-NEXT: cmeq.4s v3, v0, v1
-; CHECK-NEXT: cmeq.4s v1, v2, v1
-; CHECK-NEXT: and.16b v1, v1, v2
-; CHECK-NEXT: and.16b v0, v3, v0
+; CHECK-NEXT: ushll.4s v2, v0, #0
+; CHECK-NEXT: ushll2.4s v0, v0, #0
+; CHECK-NEXT: cmeq.4s v3, v2, v1
+; CHECK-NEXT: cmeq.4s v1, v0, v1
+; CHECK-NEXT: and.16b v1, v1, v0
+; CHECK-NEXT: and.16b v0, v3, v2
; CHECK-NEXT: ret
%ext = zext <8 x i13> %a to <8 x i32>
%cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
@@ -429,17 +429,17 @@ define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) {
define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) {
; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ushll.4s v2, v0, #0
-; CHECK-NEXT: ushll2.4s v0, v0, #0
+; CHECK-NEXT: ushll2.4s v2, v0, #0
+; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: shl.4s v0, v0, #19
; CHECK-NEXT: shl.4s v2, v2, #19
; CHECK-NEXT: sshr.4s v0, v0, #19
; CHECK-NEXT: sshr.4s v2, v2, #19
-; CHECK-NEXT: cmeq.4s v3, v2, v1
-; CHECK-NEXT: cmeq.4s v1, v0, v1
-; CHECK-NEXT: and.16b v1, v1, v0
-; CHECK-NEXT: and.16b v0, v3, v2
+; CHECK-NEXT: cmeq.4s v3, v0, v1
+; CHECK-NEXT: cmeq.4s v1, v2, v1
+; CHECK-NEXT: and.16b v1, v1, v2
+; CHECK-NEXT: and.16b v0, v3, v0
; CHECK-NEXT: ret
%ext = sext <8 x i13> %a to <8 x i32>
%cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
@@ -493,17 +493,17 @@ entry:
define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) {
; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15:
; CHECK: ; %bb.0:
-; CHECK-NEXT: ushll.4s v2, v0, #0
-; CHECK-NEXT: ushll2.4s v0, v0, #0
+; CHECK-NEXT: ushll2.4s v2, v0, #0
+; CHECK-NEXT: ushll.4s v0, v0, #0
; CHECK-NEXT: movi.4s v1, #10
; CHECK-NEXT: shl.4s v0, v0, #17
; CHECK-NEXT: shl.4s v2, v2, #17
; CHECK-NEXT: sshr.4s v0, v0, #17
; CHECK-NEXT: sshr.4s v2, v2, #17
-; CHECK-NEXT: cmge.4s v3, v2, v1
-; CHECK-NEXT: cmge.4s v1, v0, v1
-; CHECK-NEXT: and.16b v1, v1, v0
-; CHECK-NEXT: and.16b v0, v3, v2
+; CHECK-NEXT: cmge.4s v3, v0, v1
+; CHECK-NEXT: cmge.4s v1, v2, v1
+; CHECK-NEXT: and.16b v1, v1, v2
+; CHECK-NEXT: and.16b v0, v3, v0
; CHECK-NEXT: ret
%ext = sext <8 x i15> %a to <8 x i32>
%cmp = icmp sge <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
diff --git a/llvm/test/CodeGen/X86/avg-mask.ll b/llvm/test/CodeGen/X86/avg-mask.ll
index b148cd3d42df6..e8866393e8b62 100644
--- a/llvm/test/CodeGen/X86/avg-mask.ll
+++ b/llvm/test/CodeGen/X86/avg-mask.ll
@@ -177,11 +177,11 @@ define <64 x i8> @avg_v64i8_maskz(<64 x i8> %a, <64 x i8> %b, i64 %mask) nounwin
; AVX512F-NEXT: shrq $32, %rdi
; AVX512F-NEXT: shrq $48, %rax
; AVX512F-NEXT: shrl $16, %ecx
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpavgb %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512F-NEXT: kmovw %ecx, %k2
; AVX512F-NEXT: kmovw %eax, %k3
; AVX512F-NEXT: kmovw %edi, %k4
@@ -364,11 +364,11 @@ define <32 x i16> @avg_v32i16_maskz(<32 x i16> %a, <32 x i16> %b, i32 %mask) nou
; AVX512F: # %bb.0:
; AVX512F-NEXT: kmovw %edi, %k1
; AVX512F-NEXT: shrl $16, %edi
-; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2
-; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3
-; AVX512F-NEXT: vpavgw %ymm2, %ymm3, %ymm2
+; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm2
+; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm1
+; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; AVX512F-NEXT: vpavgw %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0
+; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm2, %zmm0
; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: vpternlogd {{.*#+}} zmm1 {%k1} {z} = -1
; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index c60d9a3ff17d3..1a712ffac5b7e 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -6,7 +6,8 @@
define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: zext_8x8mem_to_8x16:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; KNL-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -21,7 +22,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
;
; AVX512DQNOBW-LABEL: zext_8x8mem_to_8x16:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
+; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -35,7 +37,8 @@ define <8 x i16> @zext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone {
; KNL-LABEL: sext_8x8mem_to_8x16:
; KNL: # %bb.0:
-; KNL-NEXT: vpmovsxbw (%rdi), %xmm1
+; KNL-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; KNL-NEXT: vpmovsxbw %xmm1, %xmm1
; KNL-NEXT: vpsllw $15, %xmm0, %xmm0
; KNL-NEXT: vpsraw $15, %xmm0, %xmm0
; KNL-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -50,7 +53,8 @@ define <8 x i16> @sext_8x8mem_to_8x16(ptr%i , <8 x i1> %mask) nounwind readnone
;
; AVX512DQNOBW-LABEL: sext_8x8mem_to_8x16:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %xmm1
+; AVX512DQNOBW-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
+; AVX512DQNOBW-NEXT: vpmovsxbw %xmm1, %xmm1
; AVX512DQNOBW-NEXT: vpsllw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpsraw $15, %xmm0, %xmm0
; AVX512DQNOBW-NEXT: vpand %xmm1, %xmm0, %xmm0
@@ -208,8 +212,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; KNL-NEXT: vmovdqu (%rdi), %ymm2
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
+; KNL-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -231,8 +237,10 @@ define <32 x i16> @zext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
-; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero
+; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
+; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
+; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
+; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero,xmm2[8],zero,xmm2[9],zero,xmm2[10],zero,xmm2[11],zero,xmm2[12],zero,xmm2[13],zero,xmm2[14],zero,xmm2[15],zero
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -253,8 +261,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; KNL-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; KNL-NEXT: vpmovsxbw 16(%rdi), %ymm2
-; KNL-NEXT: vpmovsxbw (%rdi), %ymm3
+; KNL-NEXT: vmovdqu (%rdi), %ymm2
+; KNL-NEXT: vpmovsxbw %xmm2, %ymm3
+; KNL-NEXT: vextracti128 $1, %ymm2, %xmm2
+; KNL-NEXT: vpmovsxbw %xmm2, %ymm2
; KNL-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; KNL-NEXT: vpsllw $15, %ymm0, %ymm0
; KNL-NEXT: vpsraw $15, %ymm0, %ymm0
@@ -276,8 +286,10 @@ define <32 x i16> @sext_32x8mem_to_32x16(ptr%i , <32 x i1> %mask) nounwind readn
; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
; AVX512DQNOBW-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512DQNOBW-NEXT: vpmovsxbw 16(%rdi), %ymm2
-; AVX512DQNOBW-NEXT: vpmovsxbw (%rdi), %ymm3
+; AVX512DQNOBW-NEXT: vmovdqu (%rdi), %ymm2
+; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm3
+; AVX512DQNOBW-NEXT: vextracti128 $1, %ymm2, %xmm2
+; AVX512DQNOBW-NEXT: vpmovsxbw %xmm2, %ymm2
; AVX512DQNOBW-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512DQNOBW-NEXT: vpsllw $15, %ymm0, %ymm0
; AVX512DQNOBW-NEXT: vpsraw $15, %ymm0, %ymm0
diff --git a/llvm/test/CodeGen/X86/pr78897.ll b/llvm/test/CodeGen/X86/pr78897.ll
index 4613c2bcdcaf4..db77baa7ff8a3 100644
--- a/llvm/test/CodeGen/X86/pr78897.ll
+++ b/llvm/test/CodeGen/X86/pr78897.ll
@@ -22,7 +22,7 @@ define <16 x i8> @produceShuffleVectorForByte(i8 zeroext %0) nounwind {
; X86-SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
; X86-SSE2-NEXT: pxor %xmm0, %xmm0
; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0
-; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,u,u,u,u,u,u,u,u]
+; X86-SSE2-NEXT: movq {{.*#+}} xmm1 = [17,17,17,17,17,17,17,17,0,0,0,0,0,0,0,0]
; X86-SSE2-NEXT: pand %xmm0, %xmm1
; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm1[1,1,1,1]
; X86-SSE2-NEXT: movd %xmm2, %esi
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath.ll b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
index 5cd604c62a166..a260b325f7e3c 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath.ll
@@ -410,34 +410,34 @@ define <4 x float> @v4f32_estimate(<4 x float> %x) #1 {
define <4 x float> @v4f32_estimate2(<4 x float> %x) #5 {
; SSE-LABEL: v4f32_estimate2:
; SSE: # %bb.0:
-; SSE-NEXT: rsqrtps %xmm0, %xmm2
-; SSE-NEXT: mulps %xmm0, %xmm2
-; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
-; SSE-NEXT: cmpleps %xmm0, %xmm1
-; SSE-NEXT: andps %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; SSE-NEXT: andps %xmm0, %xmm1
+; SSE-NEXT: movaps {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
+; SSE-NEXT: cmpleps %xmm1, %xmm2
+; SSE-NEXT: rsqrtps %xmm0, %xmm1
+; SSE-NEXT: mulps %xmm1, %xmm0
+; SSE-NEXT: andps %xmm2, %xmm0
; SSE-NEXT: retq
;
; AVX1-LABEL: v4f32_estimate2:
; AVX1: # %bb.0:
-; AVX1-NEXT: vrsqrtps %xmm0, %xmm1
-; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
-; AVX1-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
+; AVX1-NEXT: vrsqrtps %xmm0, %xmm2
+; AVX1-NEXT: vmulps %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vandps %xmm0, %xmm1, %xmm0
; AVX1-NEXT: retq
;
; AVX512-LABEL: v4f32_estimate2:
; AVX512: # %bb.0:
-; AVX512-NEXT: vrsqrtps %xmm0, %xmm1
-; AVX512-NEXT: vmulps %xmm1, %xmm0, %xmm1
-; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [NaN,NaN,NaN,NaN]
-; AVX512-NEXT: vandps %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vbroadcastss {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN]
+; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm1
; AVX512-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.17549435E-38,1.17549435E-38,1.17549435E-38,1.17549435E-38]
-; AVX512-NEXT: vcmpleps %xmm0, %xmm2, %xmm0
-; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vcmpleps %xmm1, %xmm2, %xmm1
+; AVX512-NEXT: vrsqrtps %xmm0, %xmm2
+; AVX512-NEXT: vmulps %xmm2, %xmm0, %xmm0
+; AVX512-NEXT: vandps %xmm0, %xmm1, %xmm0
; AVX512-NEXT: retq
%sqrt = tail call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> %x)
ret <4 x float> %sqrt
diff --git a/llvm/test/CodeGen/X86/ushl_sat_vec.ll b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
index ebb5e135eacd0..b8e83da9cf361 100644
--- a/llvm/test/CodeGen/X86/ushl_sat_vec.ll
+++ b/llvm/test/CodeGen/X86/ushl_sat_vec.ll
@@ -281,7 +281,7 @@ define <8 x i16> @vec_v8i16(<8 x i16> %x, <8 x i16> %y) nounwind {
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; X64-AVX2-NEXT: vpsllvd %ymm1, %ymm2, %ymm2
-; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
+; X64-AVX2-NEXT: vpshufb {{.*#+}} ymm2 = ymm2[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,24,25,28,29,28,29,30,31]
; X64-AVX2-NEXT: vpermq {{.*#+}} ymm2 = ymm2[0,2,2,3]
; X64-AVX2-NEXT: vpmovzxwd {{.*#+}} ymm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero,xmm2[4],zero,xmm2[5],zero,xmm2[6],zero,xmm2[7],zero
; X64-AVX2-NEXT: vpsrlvd %ymm1, %ymm3, %ymm1
diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll
index 7f4111e65cc17..61740115c5fae 100644
--- a/llvm/test/CodeGen/X86/var-permute-128.ll
+++ b/llvm/test/CodeGen/X86/var-permute-128.ll
@@ -501,39 +501,39 @@ define <8 x i16> @var_shuffle_zero_v8i16(<8 x i16> %v, <8 x i16> %indices) nounw
; SSE3-NEXT: pextrw $0, %xmm1, %eax
; SSE3-NEXT: pextrw $1, %xmm1, %ecx
; SSE3-NEXT: pextrw $2, %xmm1, %edx
-; SSE3-NEXT: pextrw $3, %xmm1, %esi
-; SSE3-NEXT: pextrw $4, %xmm1, %edi
-; SSE3-NEXT: pextrw $5, %xmm1, %r8d
-; SSE3-NEXT: pextrw $6, %xmm1, %r9d
-; SSE3-NEXT: pextrw $7, %xmm1, %r10d
+; SSE3-NEXT: pextrw $3, %xmm1, %edi
+; SSE3-NEXT: pextrw $4, %xmm1, %r8d
+; SSE3-NEXT: pextrw $5, %xmm1, %r9d
+; SSE3-NEXT: pextrw $6, %xmm1, %r10d
+; SSE3-NEXT: pextrw $7, %xmm1, %esi
; SSE3-NEXT: movdqa %xmm2, -24(%rsp)
; SSE3-NEXT: andl $7, %eax
+; SSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSE3-NEXT: andl $7, %ecx
+; SSE3-NEXT: movzwl -24(%rsp,%rcx,2), %ecx
; SSE3-NEXT: andl $7, %edx
-; SSE3-NEXT: andl $7, %esi
+; SSE3-NEXT: movzwl -24(%rsp,%rdx,2), %edx
; SSE3-NEXT: andl $7, %edi
+; SSE3-NEXT: movzwl -24(%rsp,%rdi,2), %edi
; SSE3-NEXT: andl $7, %r8d
+; SSE3-NEXT: movzwl -24(%rsp,%r8,2), %r8d
; SSE3-NEXT: andl $7, %r9d
+; SSE3-NEXT: movzwl -24(%rsp,%r9,2), %r9d
; SSE3-NEXT: andl $7, %r10d
; SSE3-NEXT: movzwl -24(%rsp,%r10,2), %r10d
-; SSE3-NEXT: movd %r10d, %xmm1
-; SSE3-NEXT: movzwl -24(%rsp,%r9,2), %r9d
-; SSE3-NEXT: movd %r9d, %xmm2
+; SSE3-NEXT: andl $7, %esi
+; SSE3-NEXT: movzwl -24(%rsp,%rsi,2), %esi
+; SSE3-NEXT: movd %esi, %xmm1
+; SSE3-NEXT: movd %r10d, %xmm2
; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT: movzwl -24(%rsp,%r8,2), %r8d
-; SSE3-NEXT: movd %r8d, %xmm1
-; SSE3-NEXT: movzwl -24(%rsp,%rdi,2), %edi
-; SSE3-NEXT: movd %edi, %xmm3
+; SSE3-NEXT: movd %r9d, %xmm1
+; SSE3-NEXT: movd %r8d, %xmm3
; SSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3]
; SSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
-; SSE3-NEXT: movzwl -24(%rsp,%rsi,2), %esi
-; SSE3-NEXT: movd %esi, %xmm1
-; SSE3-NEXT: movzwl -24(%rsp,%rdx,2), %edx
+; SSE3-NEXT: movd %edi, %xmm1
; SSE3-NEXT: movd %edx, %xmm2
; SSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3]
-; SSE3-NEXT: movzwl -24(%rsp,%rcx,2), %ecx
; SSE3-NEXT: movd %ecx, %xmm1
-; SSE3-NEXT: movzwl -24(%rsp,%rax,2), %eax
; SSE3-NEXT: movd %eax, %xmm4
; SSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3]
; SSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1]
@@ -1053,8 +1053,9 @@ define <2 x double> @var_shuffle_zero_v2f64(<2 x double> %v, <2 x i64> %indices)
; SSE3-NEXT: movq %xmm1, %rcx
; SSE3-NEXT: andl $1, %ecx
; SSE3-NEXT: movaps %xmm0, -24(%rsp)
-; SSE3-NEXT: movsd -24(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero
-; SSE3-NEXT: movhps -24(%rsp,%rcx,8), %xmm0 # xmm0 = xmm0[0,1],mem[0,1]
+; SSE3-NEXT: movq -24(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero
+; SSE3-NEXT: movq -24(%rsp,%rcx,8), %xmm1 # xmm1 = mem[0],zero
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSE3-NEXT: pandn %xmm0, %xmm2
; SSE3-NEXT: movdqa %xmm2, %xmm0
; SSE3-NEXT: retq
@@ -1077,8 +1078,9 @@ define <2 x double> @var_shuffle_zero_v2f64(<2 x double> %v, <2 x i64> %indices)
; SSSE3-NEXT: movq %xmm1, %rcx
; SSSE3-NEXT: andl $1, %ecx
; SSSE3-NEXT: movaps %xmm0, -24(%rsp)
-; SSSE3-NEXT: movsd -24(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero
-; SSSE3-NEXT: movhps -24(%rsp,%rcx,8), %xmm0 # xmm0 = xmm0[0,1],mem[0,1]
+; SSSE3-NEXT: movq -24(%rsp,%rax,8), %xmm0 # xmm0 = mem[0],zero
+; SSSE3-NEXT: movq -24(%rsp,%rcx,8), %xmm1 # xmm1 = mem[0],zero
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; SSSE3-NEXT: pandn %xmm0, %xmm2
; SSSE3-NEXT: movdqa %xmm2, %xmm0
; SSSE3-NEXT: retq
@@ -1251,16 +1253,16 @@ define <4 x float> @var_shuffle_zero_v4f32(<4 x float> %v, <4 x i32> %indices) n
; SSE3-NEXT: movd %xmm1, %esi
; SSE3-NEXT: movaps %xmm2, -24(%rsp)
; SSE3-NEXT: andl $3, %eax
+; SSE3-NEXT: movd -24(%rsp,%rax,4), %xmm1 # xmm1 = mem[0],zero,zero,zero
; SSE3-NEXT: andl $3, %ecx
+; SSE3-NEXT: movd -24(%rsp,%rcx,4), %xmm2 # xmm2 = mem[0],zero,zero,zero
; SSE3-NEXT: andl $3, %edx
+; SSE3-NEXT: movd -24(%rsp,%rdx,4), %xmm3 # xmm3 = mem[0],zero,zero,zero
; SSE3-NEXT: andl $3, %esi
-; SSE3-NEXT: movd -24(%rsp,%rsi,4), %xmm1 # xmm1 = mem[0],zero,zero,zero
-; SSE3-NEXT: movd -24(%rsp,%rdx,4), %xmm2 # xmm2 = mem[0],zero,zero,zero
-; SSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
-; SSE3-NEXT: movd -24(%rsp,%rax,4), %xmm1 # xmm1 = mem[0],zero,zero,zero
-; SSE3-NEXT: movd -24(%rsp,%rcx,4), %xmm3 # xmm3 = mem[0],zero,zero,zero
-; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
+; SSE3-NEXT: movd -24(%rsp,%rsi,4), %xmm4 # xmm4 = mem[0],zero,zero,zero
+; SSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1]
+; SSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; SSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0]
; SSE3-NEXT: pandn %xmm1, %xmm0
; SSE3-NEXT: retq
;
diff --git a/llvm/test/CodeGen/X86/vector-bo-select.ll b/llvm/test/CodeGen/X86/vector-bo-select.ll
index 11e7fe85d0239..0e37e5a2c72c6 100644
--- a/llvm/test/CodeGen/X86/vector-bo-select.ll
+++ b/llvm/test/CodeGen/X86/vector-bo-select.ll
@@ -468,29 +468,29 @@ define <16 x float> @fsub_v16f32_swap(<16 x i1> %b, <16 x float> noundef %x, <16
;
; SSE42-LABEL: fsub_v16f32_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm8
; SSE42-NEXT: psrad $31, %xmm8
-; SSE42-NEXT: pandn %xmm7, %xmm8
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
; SSE42-NEXT: psrad $31, %xmm7
; SSE42-NEXT: pandn %xmm6, %xmm7
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm6
-; SSE42-NEXT: psrad $31, %xmm6
-; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm0
; SSE42-NEXT: psrad $31, %xmm0
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm0
-; SSE42-NEXT: subps %xmm6, %xmm1
+; SSE42-NEXT: pandn %xmm5, %xmm0
+; SSE42-NEXT: subps %xmm0, %xmm1
; SSE42-NEXT: subps %xmm7, %xmm2
-; SSE42-NEXT: subps %xmm8, %xmm3
-; SSE42-NEXT: subps %xmm0, %xmm4
+; SSE42-NEXT: subps %xmm9, %xmm3
+; SSE42-NEXT: subps %xmm8, %xmm4
; SSE42-NEXT: movaps %xmm1, %xmm0
; SSE42-NEXT: movaps %xmm2, %xmm1
; SSE42-NEXT: movaps %xmm3, %xmm2
@@ -562,33 +562,32 @@ define <16 x float> @fsub_v16f32_commute_swap(<16 x i1> %b, <16 x float> noundef
;
; SSE42-LABEL: fsub_v16f32_commute_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: movaps %xmm2, %xmm8
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm2
-; SSE42-NEXT: psrad $31, %xmm2
-; SSE42-NEXT: pandn %xmm7, %xmm2
+; SSE42-NEXT: movaps %xmm3, %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm3
+; SSE42-NEXT: psrad $31, %xmm3
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm3
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
; SSE42-NEXT: psrad $31, %xmm7
; SSE42-NEXT: pandn %xmm6, %xmm7
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm6
-; SSE42-NEXT: psrad $31, %xmm6
-; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm5
-; SSE42-NEXT: psrad $31, %xmm5
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5
-; SSE42-NEXT: subps %xmm1, %xmm6
-; SSE42-NEXT: subps %xmm8, %xmm7
-; SSE42-NEXT: subps %xmm3, %xmm2
-; SSE42-NEXT: subps %xmm4, %xmm5
-; SSE42-NEXT: movaps %xmm6, %xmm0
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm0
+; SSE42-NEXT: psrad $31, %xmm0
+; SSE42-NEXT: pandn %xmm5, %xmm0
+; SSE42-NEXT: subps %xmm1, %xmm0
+; SSE42-NEXT: subps %xmm2, %xmm7
+; SSE42-NEXT: subps %xmm8, %xmm9
+; SSE42-NEXT: subps %xmm4, %xmm3
; SSE42-NEXT: movaps %xmm7, %xmm1
-; SSE42-NEXT: movaps %xmm5, %xmm3
+; SSE42-NEXT: movaps %xmm9, %xmm2
; SSE42-NEXT: retq
;
; AVX2-LABEL: fsub_v16f32_commute_swap:
@@ -2407,29 +2406,29 @@ define <16 x i32> @sub_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i3
;
; SSE42-LABEL: sub_v16i32_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm8
; SSE42-NEXT: psrad $31, %xmm8
-; SSE42-NEXT: pandn %xmm7, %xmm8
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
; SSE42-NEXT: psrad $31, %xmm7
; SSE42-NEXT: pandn %xmm6, %xmm7
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm6
-; SSE42-NEXT: psrad $31, %xmm6
-; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm0
; SSE42-NEXT: psrad $31, %xmm0
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm0
-; SSE42-NEXT: psubd %xmm6, %xmm1
+; SSE42-NEXT: pandn %xmm5, %xmm0
+; SSE42-NEXT: psubd %xmm0, %xmm1
; SSE42-NEXT: psubd %xmm7, %xmm2
-; SSE42-NEXT: psubd %xmm8, %xmm3
-; SSE42-NEXT: psubd %xmm0, %xmm4
+; SSE42-NEXT: psubd %xmm9, %xmm3
+; SSE42-NEXT: psubd %xmm8, %xmm4
; SSE42-NEXT: movdqa %xmm1, %xmm0
; SSE42-NEXT: movdqa %xmm2, %xmm1
; SSE42-NEXT: movdqa %xmm3, %xmm2
@@ -2501,33 +2500,32 @@ define <16 x i32> @sub_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x,
;
; SSE42-LABEL: sub_v16i32_commute_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm2, %xmm8
-; SSE42-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm2
-; SSE42-NEXT: psrad $31, %xmm2
-; SSE42-NEXT: pandn %xmm7, %xmm2
+; SSE42-NEXT: movdqa %xmm3, %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm0[3,3,3,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm3
+; SSE42-NEXT: psrad $31, %xmm3
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm3
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
; SSE42-NEXT: psrad $31, %xmm7
; SSE42-NEXT: pandn %xmm6, %xmm7
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm6
-; SSE42-NEXT: psrad $31, %xmm6
-; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm5
-; SSE42-NEXT: psrad $31, %xmm5
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5
-; SSE42-NEXT: psubd %xmm1, %xmm6
-; SSE42-NEXT: psubd %xmm8, %xmm7
-; SSE42-NEXT: psubd %xmm3, %xmm2
-; SSE42-NEXT: psubd %xmm4, %xmm5
-; SSE42-NEXT: movdqa %xmm6, %xmm0
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm0
+; SSE42-NEXT: psrad $31, %xmm0
+; SSE42-NEXT: pandn %xmm5, %xmm0
+; SSE42-NEXT: psubd %xmm1, %xmm0
+; SSE42-NEXT: psubd %xmm2, %xmm7
+; SSE42-NEXT: psubd %xmm8, %xmm9
+; SSE42-NEXT: psubd %xmm4, %xmm3
; SSE42-NEXT: movdqa %xmm7, %xmm1
-; SSE42-NEXT: movdqa %xmm5, %xmm3
+; SSE42-NEXT: movdqa %xmm9, %xmm2
; SSE42-NEXT: retq
;
; AVX2-LABEL: sub_v16i32_commute_swap:
@@ -3371,41 +3369,41 @@ define <16 x i32> @shl_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i3
;
; SSE42-LABEL: shl_v16i32_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm8
; SSE42-NEXT: psrad $31, %xmm8
-; SSE42-NEXT: pandn %xmm7, %xmm8
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
; SSE42-NEXT: psrad $31, %xmm7
; SSE42-NEXT: pandn %xmm6, %xmm7
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm6
-; SSE42-NEXT: psrad $31, %xmm6
-; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm5
-; SSE42-NEXT: psrad $31, %xmm5
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5
-; SSE42-NEXT: pslld $23, %xmm6
-; SSE42-NEXT: movdqa {{.*#+}} xmm9 = [1065353216,1065353216,1065353216,1065353216]
-; SSE42-NEXT: paddd %xmm9, %xmm6
-; SSE42-NEXT: cvttps2dq %xmm6, %xmm0
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm0
+; SSE42-NEXT: psrad $31, %xmm0
+; SSE42-NEXT: pandn %xmm5, %xmm0
+; SSE42-NEXT: pslld $23, %xmm0
+; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
+; SSE42-NEXT: paddd %xmm5, %xmm0
+; SSE42-NEXT: cvttps2dq %xmm0, %xmm0
; SSE42-NEXT: pmulld %xmm1, %xmm0
; SSE42-NEXT: pslld $23, %xmm7
-; SSE42-NEXT: paddd %xmm9, %xmm7
+; SSE42-NEXT: paddd %xmm5, %xmm7
; SSE42-NEXT: cvttps2dq %xmm7, %xmm1
; SSE42-NEXT: pmulld %xmm2, %xmm1
-; SSE42-NEXT: pslld $23, %xmm8
-; SSE42-NEXT: paddd %xmm9, %xmm8
-; SSE42-NEXT: cvttps2dq %xmm8, %xmm2
+; SSE42-NEXT: pslld $23, %xmm9
+; SSE42-NEXT: paddd %xmm5, %xmm9
+; SSE42-NEXT: cvttps2dq %xmm9, %xmm2
; SSE42-NEXT: pmulld %xmm3, %xmm2
-; SSE42-NEXT: pslld $23, %xmm5
-; SSE42-NEXT: paddd %xmm9, %xmm5
-; SSE42-NEXT: cvttps2dq %xmm5, %xmm3
+; SSE42-NEXT: pslld $23, %xmm8
+; SSE42-NEXT: paddd %xmm5, %xmm8
+; SSE42-NEXT: cvttps2dq %xmm8, %xmm3
; SSE42-NEXT: pmulld %xmm4, %xmm3
; SSE42-NEXT: retq
;
@@ -3508,11 +3506,16 @@ define <16 x i32> @shl_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x,
;
; SSE42-LABEL: shl_v16i32_commute_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm8
; SSE42-NEXT: psrad $31, %xmm8
-; SSE42-NEXT: pandn %xmm7, %xmm8
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
@@ -3522,28 +3525,23 @@ define <16 x i32> @shl_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x,
; SSE42-NEXT: pslld $31, %xmm6
; SSE42-NEXT: psrad $31, %xmm6
; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm5
-; SSE42-NEXT: psrad $31, %xmm5
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5
; SSE42-NEXT: pslld $23, %xmm1
-; SSE42-NEXT: movdqa {{.*#+}} xmm9 = [1065353216,1065353216,1065353216,1065353216]
-; SSE42-NEXT: paddd %xmm9, %xmm1
+; SSE42-NEXT: movdqa {{.*#+}} xmm5 = [1065353216,1065353216,1065353216,1065353216]
+; SSE42-NEXT: paddd %xmm5, %xmm1
; SSE42-NEXT: cvttps2dq %xmm1, %xmm0
; SSE42-NEXT: pmulld %xmm6, %xmm0
; SSE42-NEXT: pslld $23, %xmm2
-; SSE42-NEXT: paddd %xmm9, %xmm2
+; SSE42-NEXT: paddd %xmm5, %xmm2
; SSE42-NEXT: cvttps2dq %xmm2, %xmm1
; SSE42-NEXT: pmulld %xmm7, %xmm1
; SSE42-NEXT: pslld $23, %xmm3
-; SSE42-NEXT: paddd %xmm9, %xmm3
+; SSE42-NEXT: paddd %xmm5, %xmm3
; SSE42-NEXT: cvttps2dq %xmm3, %xmm2
-; SSE42-NEXT: pmulld %xmm8, %xmm2
+; SSE42-NEXT: pmulld %xmm9, %xmm2
; SSE42-NEXT: pslld $23, %xmm4
-; SSE42-NEXT: paddd %xmm9, %xmm4
+; SSE42-NEXT: paddd %xmm5, %xmm4
; SSE42-NEXT: cvttps2dq %xmm4, %xmm3
-; SSE42-NEXT: pmulld %xmm5, %xmm3
+; SSE42-NEXT: pmulld %xmm8, %xmm3
; SSE42-NEXT: retq
;
; AVX2-LABEL: shl_v16i32_commute_swap:
@@ -4078,85 +4076,85 @@ define <16 x i32> @lshr_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i
;
; SSE42-LABEL: lshr_v16i32_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm8
; SSE42-NEXT: psrad $31, %xmm8
-; SSE42-NEXT: pandn %xmm7, %xmm8
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
; SSE42-NEXT: psrad $31, %xmm7
; SSE42-NEXT: pandn %xmm6, %xmm7
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm6
-; SSE42-NEXT: psrad $31, %xmm6
-; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm5
-; SSE42-NEXT: psrad $31, %xmm5
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5
-; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm1, %xmm9
-; SSE42-NEXT: psrld %xmm0, %xmm9
-; SSE42-NEXT: pshufd {{.*#+}} xmm10 = xmm6[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm10[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm0
+; SSE42-NEXT: psrad $31, %xmm0
+; SSE42-NEXT: pandn %xmm5, %xmm0
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm0[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm1, %xmm6
+; SSE42-NEXT: psrld %xmm5, %xmm6
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm10 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm11
-; SSE42-NEXT: psrld %xmm0, %xmm11
-; SSE42-NEXT: pblendw {{.*#+}} xmm11 = xmm9[0,1,2,3],xmm11[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrld %xmm10, %xmm11
+; SSE42-NEXT: pblendw {{.*#+}} xmm11 = xmm6[0,1,2,3],xmm11[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm0[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm0
; SSE42-NEXT: psrld %xmm6, %xmm0
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm10[0,1,1,1,4,5,6,7]
-; SSE42-NEXT: psrld %xmm6, %xmm1
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrld %xmm5, %xmm1
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm11[2,3],xmm0[4,5],xmm11[6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm7[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm2, %xmm6
-; SSE42-NEXT: psrld %xmm1, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm7[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm9[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm2, %xmm5
+; SSE42-NEXT: psrld %xmm1, %xmm5
+; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm7[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm6[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm10
; SSE42-NEXT: psrld %xmm1, %xmm10
-; SSE42-NEXT: pblendw {{.*#+}} xmm10 = xmm6[0,1,2,3],xmm10[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm7[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm10 = xmm5[0,1,2,3],xmm10[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm7[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm1
-; SSE42-NEXT: psrld %xmm6, %xmm1
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm9[0,1,1,1,4,5,6,7]
-; SSE42-NEXT: psrld %xmm6, %xmm2
+; SSE42-NEXT: psrld %xmm5, %xmm1
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm6[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrld %xmm5, %xmm2
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm10[2,3],xmm1[4,5],xmm10[6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm8[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm3, %xmm6
-; SSE42-NEXT: psrld %xmm2, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm8[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm7[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm3, %xmm9
-; SSE42-NEXT: psrld %xmm2, %xmm9
-; SSE42-NEXT: pblendw {{.*#+}} xmm9 = xmm6[0,1,2,3],xmm9[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm8[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm9[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm3, %xmm5
+; SSE42-NEXT: psrld %xmm2, %xmm5
+; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm9[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm6[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm3, %xmm7
+; SSE42-NEXT: psrld %xmm2, %xmm7
+; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm5[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm9[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm3, %xmm2
-; SSE42-NEXT: psrld %xmm6, %xmm2
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm7[0,1,1,1,4,5,6,7]
-; SSE42-NEXT: psrld %xmm6, %xmm3
+; SSE42-NEXT: psrld %xmm5, %xmm2
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm6[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrld %xmm5, %xmm3
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm9[2,3],xmm2[4,5],xmm9[6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm5[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm4, %xmm6
-; SSE42-NEXT: psrld %xmm3, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm7[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm4, %xmm8
-; SSE42-NEXT: psrld %xmm3, %xmm8
-; SSE42-NEXT: pblendw {{.*#+}} xmm8 = xmm6[0,1,2,3],xmm8[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm7[2,3],xmm2[4,5],xmm7[6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm8[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm4, %xmm5
+; SSE42-NEXT: psrld %xmm3, %xmm5
+; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm8[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm6[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm4, %xmm7
+; SSE42-NEXT: psrld %xmm3, %xmm7
+; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm5[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm8[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm4, %xmm3
; SSE42-NEXT: psrld %xmm5, %xmm3
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm7[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm6[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrld %xmm5, %xmm4
; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm8[2,3],xmm3[4,5],xmm8[6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm7[2,3],xmm3[4,5],xmm7[6,7]
; SSE42-NEXT: retq
;
; AVX2-LABEL: lshr_v16i32_swap:
@@ -4280,74 +4278,73 @@ define <16 x i32> @lshr_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x,
;
; SSE42-LABEL: lshr_v16i32_commute_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm3, %xmm10
-; SSE42-NEXT: movdqa %xmm2, %xmm9
-; SSE42-NEXT: movdqa %xmm1, %xmm8
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: movdqa %xmm3, %xmm8
+; SSE42-NEXT: movdqa %xmm2, %xmm10
+; SSE42-NEXT: movdqa %xmm1, %xmm9
+; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm3
+; SSE42-NEXT: psrad $31, %xmm3
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm3
+; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm2
; SSE42-NEXT: psrad $31, %xmm2
; SSE42-NEXT: pandn %xmm7, %xmm2
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm1
; SSE42-NEXT: psrad $31, %xmm1
; SSE42-NEXT: pandn %xmm6, %xmm1
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm0
; SSE42-NEXT: psrad $31, %xmm0
; SSE42-NEXT: pandn %xmm5, %xmm0
-; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm3
-; SSE42-NEXT: psrad $31, %xmm3
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm3
-; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm8[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm9[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm0, %xmm7
; SSE42-NEXT: psrld %xmm6, %xmm7
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm8[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm9[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm0, %xmm11
; SSE42-NEXT: psrld %xmm6, %xmm11
; SSE42-NEXT: pblendw {{.*#+}} xmm11 = xmm11[0,1,2,3],xmm7[4,5,6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm0, %xmm6
; SSE42-NEXT: psrld %xmm5, %xmm6
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm8[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm9[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrld %xmm5, %xmm0
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm11[2,3],xmm0[4,5],xmm11[6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm9[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm10[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm7
; SSE42-NEXT: psrld %xmm6, %xmm7
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm9[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm1, %xmm8
-; SSE42-NEXT: psrld %xmm6, %xmm8
-; SSE42-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm10[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm1, %xmm9
+; SSE42-NEXT: psrld %xmm6, %xmm9
+; SSE42-NEXT: pblendw {{.*#+}} xmm9 = xmm9[0,1,2,3],xmm7[4,5,6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm6
; SSE42-NEXT: psrld %xmm5, %xmm6
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm9[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm10[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrld %xmm5, %xmm1
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm6[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm8[2,3],xmm1[4,5],xmm8[6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm10[2,3,2,3]
+; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm9[2,3],xmm1[4,5],xmm9[6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm8[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm7
; SSE42-NEXT: psrld %xmm6, %xmm7
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm10[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm2, %xmm8
-; SSE42-NEXT: psrld %xmm6, %xmm8
-; SSE42-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm8[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm2, %xmm9
+; SSE42-NEXT: psrld %xmm6, %xmm9
+; SSE42-NEXT: pblendw {{.*#+}} xmm9 = xmm9[0,1,2,3],xmm7[4,5,6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm6
; SSE42-NEXT: psrld %xmm5, %xmm6
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm10[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm8[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrld %xmm5, %xmm2
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm8[2,3],xmm2[4,5],xmm8[6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm9[2,3],xmm2[4,5],xmm9[6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm3, %xmm7
@@ -4929,85 +4926,85 @@ define <16 x i32> @ashr_v16i32_swap(<16 x i1> %b, <16 x i32> noundef %x, <16 x i
;
; SSE42-LABEL: ashr_v16i32_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm8
; SSE42-NEXT: psrad $31, %xmm8
-; SSE42-NEXT: pandn %xmm7, %xmm8
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm8
+; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm9[0],zero,zero,zero,xmm9[1],zero,zero,zero,xmm9[2],zero,zero,zero,xmm9[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm9
+; SSE42-NEXT: psrad $31, %xmm9
+; SSE42-NEXT: pandn %xmm7, %xmm9
; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm0[1,1,1,1]
; SSE42-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm7
; SSE42-NEXT: psrad $31, %xmm7
; SSE42-NEXT: pandn %xmm6, %xmm7
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm6
-; SSE42-NEXT: psrad $31, %xmm6
-; SSE42-NEXT: pandn %xmm5, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm5
-; SSE42-NEXT: psrad $31, %xmm5
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm5
-; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm6[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm1, %xmm9
-; SSE42-NEXT: psrad %xmm0, %xmm9
-; SSE42-NEXT: pshufd {{.*#+}} xmm10 = xmm6[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm0 = xmm10[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm0
+; SSE42-NEXT: psrad $31, %xmm0
+; SSE42-NEXT: pandn %xmm5, %xmm0
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm0[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm1, %xmm6
+; SSE42-NEXT: psrad %xmm5, %xmm6
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm10 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm11
-; SSE42-NEXT: psrad %xmm0, %xmm11
-; SSE42-NEXT: pblendw {{.*#+}} xmm11 = xmm9[0,1,2,3],xmm11[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm6[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrad %xmm10, %xmm11
+; SSE42-NEXT: pblendw {{.*#+}} xmm11 = xmm6[0,1,2,3],xmm11[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm0[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm0
; SSE42-NEXT: psrad %xmm6, %xmm0
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm10[0,1,1,1,4,5,6,7]
-; SSE42-NEXT: psrad %xmm6, %xmm1
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrad %xmm5, %xmm1
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm11[2,3],xmm0[4,5],xmm11[6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm7[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm2, %xmm6
-; SSE42-NEXT: psrad %xmm1, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm9 = xmm7[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm9[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm2, %xmm5
+; SSE42-NEXT: psrad %xmm1, %xmm5
+; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm7[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm1 = xmm6[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm10
; SSE42-NEXT: psrad %xmm1, %xmm10
-; SSE42-NEXT: pblendw {{.*#+}} xmm10 = xmm6[0,1,2,3],xmm10[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm7[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm10 = xmm5[0,1,2,3],xmm10[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm7[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm1
-; SSE42-NEXT: psrad %xmm6, %xmm1
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm9[0,1,1,1,4,5,6,7]
-; SSE42-NEXT: psrad %xmm6, %xmm2
+; SSE42-NEXT: psrad %xmm5, %xmm1
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm6[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrad %xmm5, %xmm2
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm2[4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm10[2,3],xmm1[4,5],xmm10[6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm8[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm3, %xmm6
-; SSE42-NEXT: psrad %xmm2, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm8[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm7[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm3, %xmm9
-; SSE42-NEXT: psrad %xmm2, %xmm9
-; SSE42-NEXT: pblendw {{.*#+}} xmm9 = xmm6[0,1,2,3],xmm9[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm8[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm9[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm3, %xmm5
+; SSE42-NEXT: psrad %xmm2, %xmm5
+; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm9[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm2 = xmm6[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm3, %xmm7
+; SSE42-NEXT: psrad %xmm2, %xmm7
+; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm5[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm9[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm3, %xmm2
-; SSE42-NEXT: psrad %xmm6, %xmm2
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm7[0,1,1,1,4,5,6,7]
-; SSE42-NEXT: psrad %xmm6, %xmm3
+; SSE42-NEXT: psrad %xmm5, %xmm2
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm6[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: psrad %xmm5, %xmm3
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm3[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm9[2,3],xmm2[4,5],xmm9[6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm5[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm4, %xmm6
-; SSE42-NEXT: psrad %xmm3, %xmm6
-; SSE42-NEXT: pshufd {{.*#+}} xmm7 = xmm5[2,3,2,3]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm7[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm4, %xmm8
-; SSE42-NEXT: psrad %xmm3, %xmm8
-; SSE42-NEXT: pblendw {{.*#+}} xmm8 = xmm6[0,1,2,3],xmm8[4,5,6,7]
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm7[2,3],xmm2[4,5],xmm7[6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm8[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm4, %xmm5
+; SSE42-NEXT: psrad %xmm3, %xmm5
+; SSE42-NEXT: pshufd {{.*#+}} xmm6 = xmm8[2,3,2,3]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm3 = xmm6[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm4, %xmm7
+; SSE42-NEXT: psrad %xmm3, %xmm7
+; SSE42-NEXT: pblendw {{.*#+}} xmm7 = xmm5[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm8[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm4, %xmm3
; SSE42-NEXT: psrad %xmm5, %xmm3
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm7[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm6[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrad %xmm5, %xmm4
; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1,2,3],xmm4[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm8[2,3],xmm3[4,5],xmm8[6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm7[2,3],xmm3[4,5],xmm7[6,7]
; SSE42-NEXT: retq
;
; AVX2-LABEL: ashr_v16i32_swap:
@@ -5131,74 +5128,73 @@ define <16 x i32> @ashr_v16i32_commute_swap(<16 x i1> %b, <16 x i32> noundef %x,
;
; SSE42-LABEL: ashr_v16i32_commute_swap:
; SSE42: # %bb.0:
-; SSE42-NEXT: movdqa %xmm3, %xmm10
-; SSE42-NEXT: movdqa %xmm2, %xmm9
-; SSE42-NEXT: movdqa %xmm1, %xmm8
-; SSE42-NEXT: movdqa %xmm0, %xmm3
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: movdqa %xmm3, %xmm8
+; SSE42-NEXT: movdqa %xmm2, %xmm10
+; SSE42-NEXT: movdqa %xmm1, %xmm9
+; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[3,3,3,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
+; SSE42-NEXT: pslld $31, %xmm3
+; SSE42-NEXT: psrad $31, %xmm3
+; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm3
+; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm2
; SSE42-NEXT: psrad $31, %xmm2
; SSE42-NEXT: pandn %xmm7, %xmm2
-; SSE42-NEXT: pshufd {{.*#+}} xmm0 = xmm3[1,1,1,1]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE42-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1]
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm1
; SSE42-NEXT: psrad $31, %xmm1
; SSE42-NEXT: pandn %xmm6, %xmm1
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
+; SSE42-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; SSE42-NEXT: pslld $31, %xmm0
; SSE42-NEXT: psrad $31, %xmm0
; SSE42-NEXT: pandn %xmm5, %xmm0
-; SSE42-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3]
-; SSE42-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero
-; SSE42-NEXT: pslld $31, %xmm3
-; SSE42-NEXT: psrad $31, %xmm3
-; SSE42-NEXT: pandn {{[0-9]+}}(%rsp), %xmm3
-; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm8[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm9[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm0, %xmm7
; SSE42-NEXT: psrad %xmm6, %xmm7
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm8[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm9[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm0, %xmm11
; SSE42-NEXT: psrad %xmm6, %xmm11
; SSE42-NEXT: pblendw {{.*#+}} xmm11 = xmm11[0,1,2,3],xmm7[4,5,6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm0, %xmm6
; SSE42-NEXT: psrad %xmm5, %xmm6
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm8[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm9[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrad %xmm5, %xmm0
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm6[4,5,6,7]
; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm11[2,3],xmm0[4,5],xmm11[6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm9[2,3,2,3]
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm10[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm7
; SSE42-NEXT: psrad %xmm6, %xmm7
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm9[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm1, %xmm8
-; SSE42-NEXT: psrad %xmm6, %xmm8
-; SSE42-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm10[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm1, %xmm9
+; SSE42-NEXT: psrad %xmm6, %xmm9
+; SSE42-NEXT: pblendw {{.*#+}} xmm9 = xmm9[0,1,2,3],xmm7[4,5,6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm1, %xmm6
; SSE42-NEXT: psrad %xmm5, %xmm6
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm9[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm10[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrad %xmm5, %xmm1
; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3],xmm6[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm8[2,3],xmm1[4,5],xmm8[6,7]
-; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm10[2,3,2,3]
+; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm9[2,3],xmm1[4,5],xmm9[6,7]
+; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm8[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm7
; SSE42-NEXT: psrad %xmm6, %xmm7
-; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm10[2,3,3,3,4,5,6,7]
-; SSE42-NEXT: movdqa %xmm2, %xmm8
-; SSE42-NEXT: psrad %xmm6, %xmm8
-; SSE42-NEXT: pblendw {{.*#+}} xmm8 = xmm8[0,1,2,3],xmm7[4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm8[2,3,3,3,4,5,6,7]
+; SSE42-NEXT: movdqa %xmm2, %xmm9
+; SSE42-NEXT: psrad %xmm6, %xmm9
+; SSE42-NEXT: pblendw {{.*#+}} xmm9 = xmm9[0,1,2,3],xmm7[4,5,6,7]
; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm5[0,1,1,1,4,5,6,7]
; SSE42-NEXT: movdqa %xmm2, %xmm6
; SSE42-NEXT: psrad %xmm5, %xmm6
-; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm10[0,1,1,1,4,5,6,7]
+; SSE42-NEXT: pshuflw {{.*#+}} xmm5 = xmm8[0,1,1,1,4,5,6,7]
; SSE42-NEXT: psrad %xmm5, %xmm2
; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1,2,3],xmm6[4,5,6,7]
-; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm8[2,3],xmm2[4,5],xmm8[6,7]
+; SSE42-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm9[2,3],xmm2[4,5],xmm9[6,7]
; SSE42-NEXT: pshufd {{.*#+}} xmm5 = xmm4[2,3,2,3]
; SSE42-NEXT: pshuflw {{.*#+}} xmm6 = xmm5[2,3,3,3,4,5,6,7]
; SSE42-NEXT: movdqa %xmm3, %xmm7
More information about the llvm-commits
mailing list