[llvm] [WebAssembly] Handle wide mask reductions in performSETCCCombine (PR #189358)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 30 04:07:14 PDT 2026


https://github.com/ParkHanbum created https://github.com/llvm/llvm-project/pull/189358

This extends `performSETCCCombine` to handle wide fixed-length mask
reductions such as `v32i1` and `v64i1`.

Small mask reductions already had good combines, but `v32i1`/`v64i1`
cases still fell back to scalar bitmask materialization. This change
adds a wide-mask path using chunked `wasm_anytrue` / `wasm_alltrue`,
while preserving the existing small-mask combines.

For example, a reduction like

  %1 = icmp eq <32 x i16> %v, zeroinitializer
  %2 = bitcast <32 x i1> %1 to i32
  %3 = icmp ne i32 %2, 0

now avoids scalar bitmask reconstruction.

Fixed: https://github.com/llvm/llvm-project/issues/187294

>From 17a928d8893f8a31eee21ef1a306c66191baaead Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Wed, 25 Mar 2026 16:39:40 +0900
Subject: [PATCH 1/4] refactoring

---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 97 +++++++++++++------
 1 file changed, 67 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 874ea2be79a33..96cfd3540ea73 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3431,6 +3431,68 @@ static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
   return DAG.getZExtOrTrunc(Ret, DL, N->getValueType(0));
 }
 
+enum class MaskReduceKind {
+  AnyTrue,
+  AllTrue,
+};
+
+struct MaskReduceInfo {
+  MaskReduceKind Kind;
+  bool Invert;
+};
+
+static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
+  auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!C)
+    return std::nullopt;
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+  // setcc (bitcast mask), 0, ne  -> any_true(mask)
+  if (C->isZero() && CC == ISD::SETNE)
+    return MaskReduceInfo{MaskReduceKind::AnyTrue, false};
+
+  // setcc (bitcast mask), 0, eq  -> !any_true(mask)
+  if (C->isZero() && CC == ISD::SETEQ)
+    return MaskReduceInfo{MaskReduceKind::AnyTrue, true};
+
+  // setcc (bitcast mask), -1, eq -> all_true(mask)
+  if (C->isAllOnes() && CC == ISD::SETEQ)
+    return MaskReduceInfo{MaskReduceKind::AllTrue, false};
+
+  // setcc (bitcast mask), -1, ne -> !all_true(mask)
+  if (C->isAllOnes() && CC == ISD::SETNE)
+    return MaskReduceInfo{MaskReduceKind::AllTrue, true};
+
+  return std::nullopt;
+}
+
+static SDValue combineMaskReduction(SDNode *N, EVT FromVT,
+                                         unsigned NumElts,
+                                         const MaskReduceInfo &Info,
+                                         SelectionDAG &DAG) {
+  EVT VecVT = FromVT.changeVectorElementType(
+      *DAG.getContext(), MVT::getIntegerVT(128 / NumElts));
+
+  switch (Info.Kind) {
+  case MaskReduceKind::AnyTrue:
+    if (!Info.Invert)
+      return TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
+          N, VecVT, DAG);
+    return TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>(
+        N, VecVT, DAG);
+
+  case MaskReduceKind::AllTrue:
+    if (!Info.Invert)
+      return TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>(
+          N, VecVT, DAG);
+    return TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>(
+        N, VecVT, DAG);
+  }
+
+  llvm_unreachable("unexpected mask reduction kind");
+}
+
 /// Try to convert a i128 comparison to a v16i8 comparison before type
 /// legalization splits it up into chunks
 static SDValue
@@ -3500,39 +3562,14 @@ static SDValue performSETCCCombine(SDNode *N,
     return SDValue();
 
   unsigned NumElts = FromVT.getVectorNumElements();
-  if (NumElts != 2 && NumElts != 4 && NumElts != 8 && NumElts != 16)
-    return SDValue();
-
-  if (!cast<ConstantSDNode>(N->getOperand(1)))
+  auto Info = classifyMaskReduction(N);
+  if (!Info)
     return SDValue();
 
   auto &DAG = DCI.DAG;
-  EVT VecVT = FromVT.changeVectorElementType(*DAG.getContext(),
-                                             MVT::getIntegerVT(128 / NumElts));
-  // setcc (iN (bitcast (vNi1 X))), 0, ne
-  //   ==> any_true (vNi1 X)
-  if (auto Match = TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
-          N, VecVT, DAG)) {
-    return Match;
-  }
-  // setcc (iN (bitcast (vNi1 X))), 0, eq
-  //   ==> xor (any_true (vNi1 X)), -1
-  if (auto Match = TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>(
-          N, VecVT, DAG)) {
-    return Match;
-  }
-  // setcc (iN (bitcast (vNi1 X))), -1, eq
-  //   ==> all_true (vNi1 X)
-  if (auto Match = TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>(
-          N, VecVT, DAG)) {
-    return Match;
-  }
-  // setcc (iN (bitcast (vNi1 X))), -1, ne
-  //   ==> xor (all_true (vNi1 X)), -1
-  if (auto Match = TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>(
-          N, VecVT, DAG)) {
-    return Match;
-  }
+  if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16)
+    return combineMaskReduction(N, FromVT, NumElts, *Info, DAG);
+
   return SDValue();
 }
 

>From 605348a5cb9ac429f13c1d9a1b8cf79e4f379d46 Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Mon, 30 Mar 2026 14:43:18 +0900
Subject: [PATCH 2/4] renaming for split

---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 54 +++++++++----------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 96cfd3540ea73..9f3fc0f658ba2 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3441,6 +3441,32 @@ struct MaskReduceInfo {
   bool Invert;
 };
 
+static SDValue combineSmallMaskReduction(SDNode *N, EVT FromVT,
+                                         unsigned NumElts,
+                                         const MaskReduceInfo &Info,
+                                         SelectionDAG &DAG) {
+  EVT VecVT = FromVT.changeVectorElementType(
+      *DAG.getContext(), MVT::getIntegerVT(128 / NumElts));
+
+  switch (Info.Kind) {
+  case MaskReduceKind::AnyTrue:
+    if (!Info.Invert)
+      return TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
+          N, VecVT, DAG);
+    return TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>(
+        N, VecVT, DAG);
+
+  case MaskReduceKind::AllTrue:
+    if (!Info.Invert)
+      return TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>(
+          N, VecVT, DAG);
+    return TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>(
+        N, VecVT, DAG);
+  }
+
+  llvm_unreachable("unexpected mask reduction kind");
+}
+
 static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
   auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!C)
@@ -3467,32 +3493,6 @@ static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
   return std::nullopt;
 }
 
-static SDValue combineMaskReduction(SDNode *N, EVT FromVT,
-                                         unsigned NumElts,
-                                         const MaskReduceInfo &Info,
-                                         SelectionDAG &DAG) {
-  EVT VecVT = FromVT.changeVectorElementType(
-      *DAG.getContext(), MVT::getIntegerVT(128 / NumElts));
-
-  switch (Info.Kind) {
-  case MaskReduceKind::AnyTrue:
-    if (!Info.Invert)
-      return TryMatchTrue<0, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
-          N, VecVT, DAG);
-    return TryMatchTrue<0, ISD::SETEQ, true, Intrinsic::wasm_anytrue>(
-        N, VecVT, DAG);
-
-  case MaskReduceKind::AllTrue:
-    if (!Info.Invert)
-      return TryMatchTrue<-1, ISD::SETEQ, false, Intrinsic::wasm_alltrue>(
-          N, VecVT, DAG);
-    return TryMatchTrue<-1, ISD::SETNE, true, Intrinsic::wasm_alltrue>(
-        N, VecVT, DAG);
-  }
-
-  llvm_unreachable("unexpected mask reduction kind");
-}
-
 /// Try to convert a i128 comparison to a v16i8 comparison before type
 /// legalization splits it up into chunks
 static SDValue
@@ -3568,7 +3568,7 @@ static SDValue performSETCCCombine(SDNode *N,
 
   auto &DAG = DCI.DAG;
   if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16)
-    return combineMaskReduction(N, FromVT, NumElts, *Info, DAG);
+    return combineSmallMaskReduction(N, FromVT, NumElts, *Info, DAG);
 
   return SDValue();
 }

>From 0d54646c0774a4913fc92d9fcef8222920bb392e Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Mon, 30 Mar 2026 14:43:48 +0900
Subject: [PATCH 3/4] [WebAssembly] Handle wide mask reductions in
 performSETCCCombine

This extends `performSETCCCombine` to handle wide fixed-length mask
reductions such as `v32i1` and `v64i1`.

Small mask reductions already had good combines, but `v32i1`/`v64i1`
cases still fell back to scalar bitmask materialization. This change
adds a wide-mask path using chunked `wasm_anytrue` / `wasm_alltrue`,
while preserving the existing small-mask combines.

For example, a reduction like

  %1 = icmp eq <32 x i16> %v, zeroinitializer
  %2 = bitcast <32 x i1> %1 to i32
  %3 = icmp ne i32 %2, 0

now avoids scalar bitmask reconstruction.

Fixed: #187294
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 62 +++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 9f3fc0f658ba2..2a09ea2c176ec 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -3467,6 +3467,64 @@ static SDValue combineSmallMaskReduction(SDNode *N, EVT FromVT,
   llvm_unreachable("unexpected mask reduction kind");
 }
 
+static SDValue combineWideMaskReduction(SDNode *N, SDValue Mask, EVT MaskVT,
+                                        unsigned NumElts,
+                                        const MaskReduceInfo &Info,
+                                        SelectionDAG &DAG) {
+  assert((NumElts == 32 || NumElts == 64) &&
+         "combineWideMaskReduction is only for wide masks");
+  assert(MaskVT.isFixedLengthVector() &&
+         MaskVT.getVectorElementType() == MVT::i1);
+  SDLoc DL(N);
+  unsigned ChunkElts = 16;
+  EVT ChunkMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
+                                     ElementCount::getFixed(ChunkElts));
+  EVT LegalVecVT = ChunkMaskVT.changeVectorElementType(
+      *DAG.getContext(), MVT::getIntegerVT(128 / ChunkElts));
+  Intrinsic::ID IID = Info.Kind == MaskReduceKind::AnyTrue
+                          ? Intrinsic::wasm_anytrue
+                          : Intrinsic::wasm_alltrue;
+
+  SmallVector<SDValue, 4> ChunkResults;
+  // Split the wide mask into v16i1 chunks and reduce each chunk separately.
+  // For example:
+  //   v32i1:  [0..15] [16..31]
+  //              |       |
+  //              v       v
+  //            chunk0  chunk1
+  //
+  //   v64i1:  [0..15] [16..31] [32..47] [48..63]
+  //              |       |       |       |
+  //              v       v       v       v
+  //            chunk0  chunk1  chunk2  chunk3
+  //
+  //   each chunk:
+  //     v16i1 -> v16i8 -> wasm_anytrue/alltrue -> i32 0/1
+  for (unsigned I = 0; I < NumElts; I += ChunkElts) {
+    SDValue ChunkMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ChunkMaskVT,
+                                    Mask, DAG.getVectorIdxConstant(I, DL));
+    SDValue LegalMask = DAG.getSExtOrTrunc(ChunkMask, DL, LegalVecVT);
+    SDValue Reduced =
+        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::i32,
+                    DAG.getConstant(IID, DL, MVT::i32), LegalMask);
+    ChunkResults.push_back(Reduced);
+  }
+
+  SDValue Acc = ChunkResults[0];
+  for (unsigned I = 1; I < ChunkResults.size(); ++I) {
+    unsigned Opc = Info.Kind == MaskReduceKind::AnyTrue ? ISD::OR : ISD::AND;
+    Acc = DAG.getNode(Opc, DL, MVT::i32, Acc, ChunkResults[I]);
+  }
+
+  if (Info.Invert)
+    Acc = DAG.getNode(ISD::XOR, DL, MVT::i32, Acc,
+                      DAG.getConstant(1, DL, MVT::i32));
+
+  if (N->getValueType(0) != MVT::i32)
+    return DAG.getZExtOrTrunc(Acc, DL, N->getValueType(0));
+  return Acc;
+}
+
 static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
   auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   if (!C)
@@ -3570,6 +3628,10 @@ static SDValue performSETCCCombine(SDNode *N,
   if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16)
     return combineSmallMaskReduction(N, FromVT, NumElts, *Info, DAG);
 
+  if (NumElts == 32 || NumElts == 64)
+    return combineWideMaskReduction(N, LHS.getOperand(0), FromVT, NumElts,
+                                    *Info, DAG);
+
   return SDValue();
 }
 

>From c2dab149b53dc8b5396d119611e34dceba802304 Mon Sep 17 00:00:00 2001
From: Hanbum Park <kese111 at gmail.com>
Date: Mon, 30 Mar 2026 14:00:31 +0900
Subject: [PATCH 4/4] add&update testcases

---
 .../WebAssembly/simd-setcc-reductions.ll      | 1099 +++++++++++++++++
 .../WebAssembly/simd-vecreduce-bool.ll        |  804 +-----------
 2 files changed, 1147 insertions(+), 756 deletions(-)

diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index e562c4ab70048..b645fc57478f3 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -144,5 +144,1104 @@ entry:
   ret i64 %0
 }
 
+define i32 @all_true_big_v32i16(<32 x i16> %v) {
+; CHECK-LABEL: all_true_big_v32i16:
+; CHECK:         .functype all_true_big_v32i16 (v128, v128, v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push22=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push21=, $4=, $pop22
+; CHECK-NEXT:    i16x8.eq $push8=, $0, $pop21
+; CHECK-NEXT:    i16x8.eq $push7=, $1, $4
+; CHECK-NEXT:    i8x16.shuffle $push9=, $pop8, $pop7, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push3=, 7
+; CHECK-NEXT:    i8x16.shl $push10=, $pop9, $pop3
+; CHECK-NEXT:    i32.const $push20=, 7
+; CHECK-NEXT:    i8x16.shr_s $push11=, $pop10, $pop20
+; CHECK-NEXT:    i8x16.all_true $push12=, $pop11
+; CHECK-NEXT:    i16x8.eq $push1=, $2, $4
+; CHECK-NEXT:    i16x8.eq $push0=, $3, $4
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push19=, 7
+; CHECK-NEXT:    i8x16.shl $push4=, $pop2, $pop19
+; CHECK-NEXT:    i32.const $push18=, 7
+; CHECK-NEXT:    i8x16.shr_s $push5=, $pop4, $pop18
+; CHECK-NEXT:    i8x16.all_true $push6=, $pop5
+; CHECK-NEXT:    i32.and $push13=, $pop12, $pop6
+; CHECK-NEXT:    i32.const $push14=, -1
+; CHECK-NEXT:    i32.xor $push15=, $pop13, $pop14
+; CHECK-NEXT:    i32.const $push16=, 1
+; CHECK-NEXT:    i32.and $push17=, $pop15, $pop16
+; CHECK-NEXT:    return $pop17
+  %1 = icmp eq <32 x i16> %v, zeroinitializer
+  %2 = bitcast <32 x i1> %1 to i32
+  %3 = icmp ne i32 %2, -1
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+define i64 @all_true_big_v64i16(<64 x i16> %v) {
+; CHECK-LABEL: all_true_big_v64i16:
+; CHECK:         .functype all_true_big_v64i16 (v128, v128, v128, v128, v128, v128, v128, v128) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push41=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push40=, $8=, $pop41
+; CHECK-NEXT:    i16x8.eq $push8=, $0, $pop40
+; CHECK-NEXT:    i16x8.eq $push7=, $1, $8
+; CHECK-NEXT:    i8x16.shuffle $push9=, $pop8, $pop7, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push3=, 7
+; CHECK-NEXT:    i8x16.shl $push10=, $pop9, $pop3
+; CHECK-NEXT:    i32.const $push39=, 7
+; CHECK-NEXT:    i8x16.shr_s $push11=, $pop10, $pop39
+; CHECK-NEXT:    i8x16.all_true $push12=, $pop11
+; CHECK-NEXT:    i16x8.eq $push1=, $2, $8
+; CHECK-NEXT:    i16x8.eq $push0=, $3, $8
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push38=, 7
+; CHECK-NEXT:    i8x16.shl $push4=, $pop2, $pop38
+; CHECK-NEXT:    i32.const $push37=, 7
+; CHECK-NEXT:    i8x16.shr_s $push5=, $pop4, $pop37
+; CHECK-NEXT:    i8x16.all_true $push6=, $pop5
+; CHECK-NEXT:    i32.and $push13=, $pop12, $pop6
+; CHECK-NEXT:    i16x8.eq $push15=, $4, $8
+; CHECK-NEXT:    i16x8.eq $push14=, $5, $8
+; CHECK-NEXT:    i8x16.shuffle $push16=, $pop15, $pop14, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push36=, 7
+; CHECK-NEXT:    i8x16.shl $push17=, $pop16, $pop36
+; CHECK-NEXT:    i32.const $push35=, 7
+; CHECK-NEXT:    i8x16.shr_s $push18=, $pop17, $pop35
+; CHECK-NEXT:    i8x16.all_true $push19=, $pop18
+; CHECK-NEXT:    i32.and $push20=, $pop13, $pop19
+; CHECK-NEXT:    i16x8.eq $push22=, $6, $8
+; CHECK-NEXT:    i16x8.eq $push21=, $7, $8
+; CHECK-NEXT:    i8x16.shuffle $push23=, $pop22, $pop21, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push34=, 7
+; CHECK-NEXT:    i8x16.shl $push24=, $pop23, $pop34
+; CHECK-NEXT:    i32.const $push33=, 7
+; CHECK-NEXT:    i8x16.shr_s $push25=, $pop24, $pop33
+; CHECK-NEXT:    i8x16.all_true $push26=, $pop25
+; CHECK-NEXT:    i32.and $push27=, $pop20, $pop26
+; CHECK-NEXT:    i32.const $push28=, -1
+; CHECK-NEXT:    i32.xor $push29=, $pop27, $pop28
+; CHECK-NEXT:    i64.extend_i32_u $push30=, $pop29
+; CHECK-NEXT:    i64.const $push31=, 1
+; CHECK-NEXT:    i64.and $push32=, $pop30, $pop31
+; CHECK-NEXT:    return $pop32
+  %1 = icmp eq <64 x i16> %v, zeroinitializer
+  %2 = bitcast <64 x i1> %1 to i64
+  %3 = icmp ne i64 %2, -1
+  %conv3 = zext i1 %3 to i64
+  ret i64 %conv3
+}
+
+define i32 @all_true_big_v32i32(<32 x i32> %v) {
+; CHECK-LABEL: all_true_big_v32i32:
+; CHECK:         .functype all_true_big_v32i32 (v128, v128, v128, v128, v128, v128, v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push54=, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push53=, $8=, $pop54
+; CHECK-NEXT:    i32x4.eq $push21=, $0, $pop53
+; CHECK-NEXT:    i32x4.eq $push20=, $1, $8
+; CHECK-NEXT:    i8x16.shuffle $push22=, $pop21, $pop20, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push52=, $2, $8
+; CHECK-NEXT:    local.tee $push51=, $1=, $pop52
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop51, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 8, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 9, $pop25
+; CHECK-NEXT:    i32x4.extract_lane $push27=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push28=, $pop26, 10, $pop27
+; CHECK-NEXT:    i32x4.extract_lane $push29=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push30=, $pop28, 11, $pop29
+; CHECK-NEXT:    i32x4.eq $push50=, $3, $8
+; CHECK-NEXT:    local.tee $push49=, $1=, $pop50
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop49, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 12, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 13, $pop33
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 14, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 15, $pop37
+; CHECK-NEXT:    i8x16.all_true $push39=, $pop38
+; CHECK-NEXT:    i32x4.eq $push1=, $4, $8
+; CHECK-NEXT:    i32x4.eq $push0=, $5, $8
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push48=, $6, $8
+; CHECK-NEXT:    local.tee $push47=, $1=, $pop48
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop47, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 8, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 9, $pop5
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 10, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 11, $pop9
+; CHECK-NEXT:    i32x4.eq $push46=, $7, $8
+; CHECK-NEXT:    local.tee $push45=, $8=, $pop46
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop45, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 12, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $8, 1
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 13, $pop13
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $8, 2
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 14, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $8, 3
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 15, $pop17
+; CHECK-NEXT:    i8x16.all_true $push19=, $pop18
+; CHECK-NEXT:    i32.and $push40=, $pop39, $pop19
+; CHECK-NEXT:    i32.const $push41=, -1
+; CHECK-NEXT:    i32.xor $push42=, $pop40, $pop41
+; CHECK-NEXT:    i32.const $push43=, 1
+; CHECK-NEXT:    i32.and $push44=, $pop42, $pop43
+; CHECK-NEXT:    return $pop44
+  %1 = icmp eq <32 x i32> %v, zeroinitializer
+  %2 = bitcast <32 x i1> %1 to i32
+  %3 = icmp ne i32 %2, -1
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+define i64 @all_true_big_v64i32(<64 x i32> %v) {
+; CHECK-LABEL: all_true_big_v64i32:
+; CHECK:         .functype all_true_big_v64i32 (v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push105=, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push104=, $16=, $pop105
+; CHECK-NEXT:    i32x4.eq $push21=, $0, $pop104
+; CHECK-NEXT:    i32x4.eq $push20=, $1, $16
+; CHECK-NEXT:    i8x16.shuffle $push22=, $pop21, $pop20, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push103=, $2, $16
+; CHECK-NEXT:    local.tee $push102=, $1=, $pop103
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop102, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 8, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 9, $pop25
+; CHECK-NEXT:    i32x4.extract_lane $push27=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push28=, $pop26, 10, $pop27
+; CHECK-NEXT:    i32x4.extract_lane $push29=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push30=, $pop28, 11, $pop29
+; CHECK-NEXT:    i32x4.eq $push101=, $3, $16
+; CHECK-NEXT:    local.tee $push100=, $1=, $pop101
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop100, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 12, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 13, $pop33
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 14, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 15, $pop37
+; CHECK-NEXT:    i8x16.all_true $push39=, $pop38
+; CHECK-NEXT:    i32x4.eq $push1=, $4, $16
+; CHECK-NEXT:    i32x4.eq $push0=, $5, $16
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push99=, $6, $16
+; CHECK-NEXT:    local.tee $push98=, $1=, $pop99
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop98, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 8, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 9, $pop5
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 10, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 11, $pop9
+; CHECK-NEXT:    i32x4.eq $push97=, $7, $16
+; CHECK-NEXT:    local.tee $push96=, $1=, $pop97
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop96, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 12, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 13, $pop13
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 14, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 15, $pop17
+; CHECK-NEXT:    i8x16.all_true $push19=, $pop18
+; CHECK-NEXT:    i32.and $push40=, $pop39, $pop19
+; CHECK-NEXT:    i32x4.eq $push42=, $8, $16
+; CHECK-NEXT:    i32x4.eq $push41=, $9, $16
+; CHECK-NEXT:    i8x16.shuffle $push43=, $pop42, $pop41, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push95=, $10, $16
+; CHECK-NEXT:    local.tee $push94=, $1=, $pop95
+; CHECK-NEXT:    i32x4.extract_lane $push44=, $pop94, 0
+; CHECK-NEXT:    i8x16.replace_lane $push45=, $pop43, 8, $pop44
+; CHECK-NEXT:    i32x4.extract_lane $push46=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push47=, $pop45, 9, $pop46
+; CHECK-NEXT:    i32x4.extract_lane $push48=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push49=, $pop47, 10, $pop48
+; CHECK-NEXT:    i32x4.extract_lane $push50=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push51=, $pop49, 11, $pop50
+; CHECK-NEXT:    i32x4.eq $push93=, $11, $16
+; CHECK-NEXT:    local.tee $push92=, $1=, $pop93
+; CHECK-NEXT:    i32x4.extract_lane $push52=, $pop92, 0
+; CHECK-NEXT:    i8x16.replace_lane $push53=, $pop51, 12, $pop52
+; CHECK-NEXT:    i32x4.extract_lane $push54=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push55=, $pop53, 13, $pop54
+; CHECK-NEXT:    i32x4.extract_lane $push56=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push57=, $pop55, 14, $pop56
+; CHECK-NEXT:    i32x4.extract_lane $push58=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push59=, $pop57, 15, $pop58
+; CHECK-NEXT:    i8x16.all_true $push60=, $pop59
+; CHECK-NEXT:    i32.and $push61=, $pop40, $pop60
+; CHECK-NEXT:    i32x4.eq $push63=, $12, $16
+; CHECK-NEXT:    i32x4.eq $push62=, $13, $16
+; CHECK-NEXT:    i8x16.shuffle $push64=, $pop63, $pop62, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push91=, $14, $16
+; CHECK-NEXT:    local.tee $push90=, $1=, $pop91
+; CHECK-NEXT:    i32x4.extract_lane $push65=, $pop90, 0
+; CHECK-NEXT:    i8x16.replace_lane $push66=, $pop64, 8, $pop65
+; CHECK-NEXT:    i32x4.extract_lane $push67=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push68=, $pop66, 9, $pop67
+; CHECK-NEXT:    i32x4.extract_lane $push69=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push70=, $pop68, 10, $pop69
+; CHECK-NEXT:    i32x4.extract_lane $push71=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push72=, $pop70, 11, $pop71
+; CHECK-NEXT:    i32x4.eq $push89=, $15, $16
+; CHECK-NEXT:    local.tee $push88=, $16=, $pop89
+; CHECK-NEXT:    i32x4.extract_lane $push73=, $pop88, 0
+; CHECK-NEXT:    i8x16.replace_lane $push74=, $pop72, 12, $pop73
+; CHECK-NEXT:    i32x4.extract_lane $push75=, $16, 1
+; CHECK-NEXT:    i8x16.replace_lane $push76=, $pop74, 13, $pop75
+; CHECK-NEXT:    i32x4.extract_lane $push77=, $16, 2
+; CHECK-NEXT:    i8x16.replace_lane $push78=, $pop76, 14, $pop77
+; CHECK-NEXT:    i32x4.extract_lane $push79=, $16, 3
+; CHECK-NEXT:    i8x16.replace_lane $push80=, $pop78, 15, $pop79
+; CHECK-NEXT:    i8x16.all_true $push81=, $pop80
+; CHECK-NEXT:    i32.and $push82=, $pop61, $pop81
+; CHECK-NEXT:    i32.const $push83=, -1
+; CHECK-NEXT:    i32.xor $push84=, $pop82, $pop83
+; CHECK-NEXT:    i64.extend_i32_u $push85=, $pop84
+; CHECK-NEXT:    i64.const $push86=, 1
+; CHECK-NEXT:    i64.and $push87=, $pop85, $pop86
+; CHECK-NEXT:    return $pop87
+  %1 = icmp eq <64 x i32> %v, zeroinitializer
+  %2 = bitcast <64 x i1> %1 to i64
+  %3 = icmp ne i64 %2, -1
+  %conv3 = zext i1 %3 to i64
+  ret i64 %conv3
+}
+
+define i32 @all_true_big_v32i64(<32 x i64> %v) {
+; CHECK-LABEL: all_true_big_v32i64:
+; CHECK:         .functype all_true_big_v32i64 (v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push86=, 0, 0
+; CHECK-NEXT:    local.tee $push85=, $16=, $pop86
+; CHECK-NEXT:    i64x2.eq $push29=, $0, $pop85
+; CHECK-NEXT:    i64x2.eq $push28=, $1, $16
+; CHECK-NEXT:    i8x16.shuffle $push30=, $pop29, $pop28, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push84=, $2, $16
+; CHECK-NEXT:    local.tee $push83=, $1=, $pop84
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop83, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 4, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 5, $pop33
+; CHECK-NEXT:    i64x2.eq $push82=, $3, $16
+; CHECK-NEXT:    local.tee $push81=, $1=, $pop82
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $pop81, 0
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 6, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 7, $pop37
+; CHECK-NEXT:    i64x2.eq $push80=, $4, $16
+; CHECK-NEXT:    local.tee $push79=, $1=, $pop80
+; CHECK-NEXT:    i32x4.extract_lane $push39=, $pop79, 0
+; CHECK-NEXT:    i8x16.replace_lane $push40=, $pop38, 8, $pop39
+; CHECK-NEXT:    i32x4.extract_lane $push41=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push42=, $pop40, 9, $pop41
+; CHECK-NEXT:    i64x2.eq $push78=, $5, $16
+; CHECK-NEXT:    local.tee $push77=, $1=, $pop78
+; CHECK-NEXT:    i32x4.extract_lane $push43=, $pop77, 0
+; CHECK-NEXT:    i8x16.replace_lane $push44=, $pop42, 10, $pop43
+; CHECK-NEXT:    i32x4.extract_lane $push45=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push46=, $pop44, 11, $pop45
+; CHECK-NEXT:    i64x2.eq $push76=, $6, $16
+; CHECK-NEXT:    local.tee $push75=, $1=, $pop76
+; CHECK-NEXT:    i32x4.extract_lane $push47=, $pop75, 0
+; CHECK-NEXT:    i8x16.replace_lane $push48=, $pop46, 12, $pop47
+; CHECK-NEXT:    i32x4.extract_lane $push49=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push50=, $pop48, 13, $pop49
+; CHECK-NEXT:    i64x2.eq $push74=, $7, $16
+; CHECK-NEXT:    local.tee $push73=, $1=, $pop74
+; CHECK-NEXT:    i32x4.extract_lane $push51=, $pop73, 0
+; CHECK-NEXT:    i8x16.replace_lane $push52=, $pop50, 14, $pop51
+; CHECK-NEXT:    i32x4.extract_lane $push53=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push54=, $pop52, 15, $pop53
+; CHECK-NEXT:    i8x16.all_true $push55=, $pop54
+; CHECK-NEXT:    i64x2.eq $push1=, $8, $16
+; CHECK-NEXT:    i64x2.eq $push0=, $9, $16
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push72=, $10, $16
+; CHECK-NEXT:    local.tee $push71=, $9=, $pop72
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop71, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 4, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 5, $pop5
+; CHECK-NEXT:    i64x2.eq $push70=, $11, $16
+; CHECK-NEXT:    local.tee $push69=, $9=, $pop70
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $pop69, 0
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 6, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 7, $pop9
+; CHECK-NEXT:    i64x2.eq $push68=, $12, $16
+; CHECK-NEXT:    local.tee $push67=, $9=, $pop68
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop67, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 8, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 9, $pop13
+; CHECK-NEXT:    i64x2.eq $push66=, $13, $16
+; CHECK-NEXT:    local.tee $push65=, $9=, $pop66
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $pop65, 0
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 10, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 11, $pop17
+; CHECK-NEXT:    i64x2.eq $push64=, $14, $16
+; CHECK-NEXT:    local.tee $push63=, $9=, $pop64
+; CHECK-NEXT:    i32x4.extract_lane $push19=, $pop63, 0
+; CHECK-NEXT:    i8x16.replace_lane $push20=, $pop18, 12, $pop19
+; CHECK-NEXT:    i32x4.extract_lane $push21=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push22=, $pop20, 13, $pop21
+; CHECK-NEXT:    i64x2.eq $push62=, $15, $16
+; CHECK-NEXT:    local.tee $push61=, $16=, $pop62
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop61, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 14, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $16, 2
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 15, $pop25
+; CHECK-NEXT:    i8x16.all_true $push27=, $pop26
+; CHECK-NEXT:    i32.and $push56=, $pop55, $pop27
+; CHECK-NEXT:    i32.const $push57=, -1
+; CHECK-NEXT:    i32.xor $push58=, $pop56, $pop57
+; CHECK-NEXT:    i32.const $push59=, 1
+; CHECK-NEXT:    i32.and $push60=, $pop58, $pop59
+; CHECK-NEXT:    return $pop60
+  %1 = icmp eq <32 x i64> %v, zeroinitializer
+  %2 = bitcast <32 x i1> %1 to i32
+  %3 = icmp ne i32 %2, -1
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+define i64 @all_true_big_v64i64(<64 x i64> %v) {
+; CHECK-LABEL: all_true_big_v64i64:
+; CHECK:         .functype all_true_big_v64i64 (v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push169=, 0, 0
+; CHECK-NEXT:    local.tee $push168=, $32=, $pop169
+; CHECK-NEXT:    i64x2.eq $push29=, $0, $pop168
+; CHECK-NEXT:    i64x2.eq $push28=, $1, $32
+; CHECK-NEXT:    i8x16.shuffle $push30=, $pop29, $pop28, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push167=, $2, $32
+; CHECK-NEXT:    local.tee $push166=, $1=, $pop167
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop166, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 4, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 5, $pop33
+; CHECK-NEXT:    i64x2.eq $push165=, $3, $32
+; CHECK-NEXT:    local.tee $push164=, $1=, $pop165
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $pop164, 0
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 6, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 7, $pop37
+; CHECK-NEXT:    i64x2.eq $push163=, $4, $32
+; CHECK-NEXT:    local.tee $push162=, $1=, $pop163
+; CHECK-NEXT:    i32x4.extract_lane $push39=, $pop162, 0
+; CHECK-NEXT:    i8x16.replace_lane $push40=, $pop38, 8, $pop39
+; CHECK-NEXT:    i32x4.extract_lane $push41=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push42=, $pop40, 9, $pop41
+; CHECK-NEXT:    i64x2.eq $push161=, $5, $32
+; CHECK-NEXT:    local.tee $push160=, $1=, $pop161
+; CHECK-NEXT:    i32x4.extract_lane $push43=, $pop160, 0
+; CHECK-NEXT:    i8x16.replace_lane $push44=, $pop42, 10, $pop43
+; CHECK-NEXT:    i32x4.extract_lane $push45=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push46=, $pop44, 11, $pop45
+; CHECK-NEXT:    i64x2.eq $push159=, $6, $32
+; CHECK-NEXT:    local.tee $push158=, $1=, $pop159
+; CHECK-NEXT:    i32x4.extract_lane $push47=, $pop158, 0
+; CHECK-NEXT:    i8x16.replace_lane $push48=, $pop46, 12, $pop47
+; CHECK-NEXT:    i32x4.extract_lane $push49=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push50=, $pop48, 13, $pop49
+; CHECK-NEXT:    i64x2.eq $push157=, $7, $32
+; CHECK-NEXT:    local.tee $push156=, $1=, $pop157
+; CHECK-NEXT:    i32x4.extract_lane $push51=, $pop156, 0
+; CHECK-NEXT:    i8x16.replace_lane $push52=, $pop50, 14, $pop51
+; CHECK-NEXT:    i32x4.extract_lane $push53=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push54=, $pop52, 15, $pop53
+; CHECK-NEXT:    i8x16.all_true $push55=, $pop54
+; CHECK-NEXT:    i64x2.eq $push1=, $8, $32
+; CHECK-NEXT:    i64x2.eq $push0=, $9, $32
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push155=, $10, $32
+; CHECK-NEXT:    local.tee $push154=, $9=, $pop155
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop154, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 4, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 5, $pop5
+; CHECK-NEXT:    i64x2.eq $push153=, $11, $32
+; CHECK-NEXT:    local.tee $push152=, $9=, $pop153
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $pop152, 0
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 6, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 7, $pop9
+; CHECK-NEXT:    i64x2.eq $push151=, $12, $32
+; CHECK-NEXT:    local.tee $push150=, $9=, $pop151
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop150, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 8, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 9, $pop13
+; CHECK-NEXT:    i64x2.eq $push149=, $13, $32
+; CHECK-NEXT:    local.tee $push148=, $9=, $pop149
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $pop148, 0
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 10, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 11, $pop17
+; CHECK-NEXT:    i64x2.eq $push147=, $14, $32
+; CHECK-NEXT:    local.tee $push146=, $9=, $pop147
+; CHECK-NEXT:    i32x4.extract_lane $push19=, $pop146, 0
+; CHECK-NEXT:    i8x16.replace_lane $push20=, $pop18, 12, $pop19
+; CHECK-NEXT:    i32x4.extract_lane $push21=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push22=, $pop20, 13, $pop21
+; CHECK-NEXT:    i64x2.eq $push145=, $15, $32
+; CHECK-NEXT:    local.tee $push144=, $9=, $pop145
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop144, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 14, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 15, $pop25
+; CHECK-NEXT:    i8x16.all_true $push27=, $pop26
+; CHECK-NEXT:    i32.and $push56=, $pop55, $pop27
+; CHECK-NEXT:    i64x2.eq $push58=, $16, $32
+; CHECK-NEXT:    i64x2.eq $push57=, $17, $32
+; CHECK-NEXT:    i8x16.shuffle $push59=, $pop58, $pop57, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push143=, $18, $32
+; CHECK-NEXT:    local.tee $push142=, $9=, $pop143
+; CHECK-NEXT:    i32x4.extract_lane $push60=, $pop142, 0
+; CHECK-NEXT:    i8x16.replace_lane $push61=, $pop59, 4, $pop60
+; CHECK-NEXT:    i32x4.extract_lane $push62=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push63=, $pop61, 5, $pop62
+; CHECK-NEXT:    i64x2.eq $push141=, $19, $32
+; CHECK-NEXT:    local.tee $push140=, $9=, $pop141
+; CHECK-NEXT:    i32x4.extract_lane $push64=, $pop140, 0
+; CHECK-NEXT:    i8x16.replace_lane $push65=, $pop63, 6, $pop64
+; CHECK-NEXT:    i32x4.extract_lane $push66=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push67=, $pop65, 7, $pop66
+; CHECK-NEXT:    i64x2.eq $push139=, $20, $32
+; CHECK-NEXT:    local.tee $push138=, $9=, $pop139
+; CHECK-NEXT:    i32x4.extract_lane $push68=, $pop138, 0
+; CHECK-NEXT:    i8x16.replace_lane $push69=, $pop67, 8, $pop68
+; CHECK-NEXT:    i32x4.extract_lane $push70=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push71=, $pop69, 9, $pop70
+; CHECK-NEXT:    i64x2.eq $push137=, $21, $32
+; CHECK-NEXT:    local.tee $push136=, $9=, $pop137
+; CHECK-NEXT:    i32x4.extract_lane $push72=, $pop136, 0
+; CHECK-NEXT:    i8x16.replace_lane $push73=, $pop71, 10, $pop72
+; CHECK-NEXT:    i32x4.extract_lane $push74=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push75=, $pop73, 11, $pop74
+; CHECK-NEXT:    i64x2.eq $push135=, $22, $32
+; CHECK-NEXT:    local.tee $push134=, $9=, $pop135
+; CHECK-NEXT:    i32x4.extract_lane $push76=, $pop134, 0
+; CHECK-NEXT:    i8x16.replace_lane $push77=, $pop75, 12, $pop76
+; CHECK-NEXT:    i32x4.extract_lane $push78=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push79=, $pop77, 13, $pop78
+; CHECK-NEXT:    i64x2.eq $push133=, $23, $32
+; CHECK-NEXT:    local.tee $push132=, $9=, $pop133
+; CHECK-NEXT:    i32x4.extract_lane $push80=, $pop132, 0
+; CHECK-NEXT:    i8x16.replace_lane $push81=, $pop79, 14, $pop80
+; CHECK-NEXT:    i32x4.extract_lane $push82=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push83=, $pop81, 15, $pop82
+; CHECK-NEXT:    i8x16.all_true $push84=, $pop83
+; CHECK-NEXT:    i32.and $push85=, $pop56, $pop84
+; CHECK-NEXT:    i64x2.eq $push87=, $24, $32
+; CHECK-NEXT:    i64x2.eq $push86=, $25, $32
+; CHECK-NEXT:    i8x16.shuffle $push88=, $pop87, $pop86, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push131=, $26, $32
+; CHECK-NEXT:    local.tee $push130=, $9=, $pop131
+; CHECK-NEXT:    i32x4.extract_lane $push89=, $pop130, 0
+; CHECK-NEXT:    i8x16.replace_lane $push90=, $pop88, 4, $pop89
+; CHECK-NEXT:    i32x4.extract_lane $push91=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push92=, $pop90, 5, $pop91
+; CHECK-NEXT:    i64x2.eq $push129=, $27, $32
+; CHECK-NEXT:    local.tee $push128=, $9=, $pop129
+; CHECK-NEXT:    i32x4.extract_lane $push93=, $pop128, 0
+; CHECK-NEXT:    i8x16.replace_lane $push94=, $pop92, 6, $pop93
+; CHECK-NEXT:    i32x4.extract_lane $push95=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push96=, $pop94, 7, $pop95
+; CHECK-NEXT:    i64x2.eq $push127=, $28, $32
+; CHECK-NEXT:    local.tee $push126=, $9=, $pop127
+; CHECK-NEXT:    i32x4.extract_lane $push97=, $pop126, 0
+; CHECK-NEXT:    i8x16.replace_lane $push98=, $pop96, 8, $pop97
+; CHECK-NEXT:    i32x4.extract_lane $push99=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push100=, $pop98, 9, $pop99
+; CHECK-NEXT:    i64x2.eq $push125=, $29, $32
+; CHECK-NEXT:    local.tee $push124=, $9=, $pop125
+; CHECK-NEXT:    i32x4.extract_lane $push101=, $pop124, 0
+; CHECK-NEXT:    i8x16.replace_lane $push102=, $pop100, 10, $pop101
+; CHECK-NEXT:    i32x4.extract_lane $push103=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push104=, $pop102, 11, $pop103
+; CHECK-NEXT:    i64x2.eq $push123=, $30, $32
+; CHECK-NEXT:    local.tee $push122=, $9=, $pop123
+; CHECK-NEXT:    i32x4.extract_lane $push105=, $pop122, 0
+; CHECK-NEXT:    i8x16.replace_lane $push106=, $pop104, 12, $pop105
+; CHECK-NEXT:    i32x4.extract_lane $push107=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push108=, $pop106, 13, $pop107
+; CHECK-NEXT:    i64x2.eq $push121=, $31, $32
+; CHECK-NEXT:    local.tee $push120=, $32=, $pop121
+; CHECK-NEXT:    i32x4.extract_lane $push109=, $pop120, 0
+; CHECK-NEXT:    i8x16.replace_lane $push110=, $pop108, 14, $pop109
+; CHECK-NEXT:    i32x4.extract_lane $push111=, $32, 2
+; CHECK-NEXT:    i8x16.replace_lane $push112=, $pop110, 15, $pop111
+; CHECK-NEXT:    i8x16.all_true $push113=, $pop112
+; CHECK-NEXT:    i32.and $push114=, $pop85, $pop113
+; CHECK-NEXT:    i32.const $push115=, -1
+; CHECK-NEXT:    i32.xor $push116=, $pop114, $pop115
+; CHECK-NEXT:    i64.extend_i32_u $push117=, $pop116
+; CHECK-NEXT:    i64.const $push118=, 1
+; CHECK-NEXT:    i64.and $push119=, $pop117, $pop118
+; CHECK-NEXT:    return $pop119
+  %1 = icmp eq <64 x i64> %v, zeroinitializer
+  %2 = bitcast <64 x i1> %1 to i64
+  %3 = icmp ne i64 %2, -1
+  %conv3 = zext i1 %3 to i64
+  ret i64 %conv3
+}
+
+define i32 @any_true_big_v32i16(<32 x i16> %v) {
+; CHECK-LABEL: any_true_big_v32i16:
+; CHECK:         .functype any_true_big_v32i16 (v128, v128, v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push20=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push19=, $4=, $pop20
+; CHECK-NEXT:    i16x8.eq $push8=, $0, $pop19
+; CHECK-NEXT:    i16x8.eq $push7=, $1, $4
+; CHECK-NEXT:    i8x16.shuffle $push9=, $pop8, $pop7, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push3=, 7
+; CHECK-NEXT:    i8x16.shl $push10=, $pop9, $pop3
+; CHECK-NEXT:    i32.const $push18=, 7
+; CHECK-NEXT:    i8x16.shr_s $push11=, $pop10, $pop18
+; CHECK-NEXT:    v128.any_true $push12=, $pop11
+; CHECK-NEXT:    i16x8.eq $push1=, $2, $4
+; CHECK-NEXT:    i16x8.eq $push0=, $3, $4
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push17=, 7
+; CHECK-NEXT:    i8x16.shl $push4=, $pop2, $pop17
+; CHECK-NEXT:    i32.const $push16=, 7
+; CHECK-NEXT:    i8x16.shr_s $push5=, $pop4, $pop16
+; CHECK-NEXT:    v128.any_true $push6=, $pop5
+; CHECK-NEXT:    i32.or $push13=, $pop12, $pop6
+; CHECK-NEXT:    i32.const $push14=, 1
+; CHECK-NEXT:    i32.and $push15=, $pop13, $pop14
+; CHECK-NEXT:    return $pop15
+  %1 = icmp eq <32 x i16> %v, zeroinitializer
+  %2 = bitcast <32 x i1> %1 to i32
+  %3 = icmp ne i32 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+define i64 @any_true_big_v64i16(<64 x i16> %v) {
+; CHECK-LABEL: any_true_big_v64i16:
+; CHECK:         .functype any_true_big_v64i16 (v128, v128, v128, v128, v128, v128, v128, v128) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push39=, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push38=, $8=, $pop39
+; CHECK-NEXT:    i16x8.eq $push8=, $0, $pop38
+; CHECK-NEXT:    i16x8.eq $push7=, $1, $8
+; CHECK-NEXT:    i8x16.shuffle $push9=, $pop8, $pop7, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push3=, 7
+; CHECK-NEXT:    i8x16.shl $push10=, $pop9, $pop3
+; CHECK-NEXT:    i32.const $push37=, 7
+; CHECK-NEXT:    i8x16.shr_s $push11=, $pop10, $pop37
+; CHECK-NEXT:    v128.any_true $push12=, $pop11
+; CHECK-NEXT:    i16x8.eq $push1=, $2, $8
+; CHECK-NEXT:    i16x8.eq $push0=, $3, $8
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push36=, 7
+; CHECK-NEXT:    i8x16.shl $push4=, $pop2, $pop36
+; CHECK-NEXT:    i32.const $push35=, 7
+; CHECK-NEXT:    i8x16.shr_s $push5=, $pop4, $pop35
+; CHECK-NEXT:    v128.any_true $push6=, $pop5
+; CHECK-NEXT:    i32.or $push13=, $pop12, $pop6
+; CHECK-NEXT:    i16x8.eq $push15=, $4, $8
+; CHECK-NEXT:    i16x8.eq $push14=, $5, $8
+; CHECK-NEXT:    i8x16.shuffle $push16=, $pop15, $pop14, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push34=, 7
+; CHECK-NEXT:    i8x16.shl $push17=, $pop16, $pop34
+; CHECK-NEXT:    i32.const $push33=, 7
+; CHECK-NEXT:    i8x16.shr_s $push18=, $pop17, $pop33
+; CHECK-NEXT:    v128.any_true $push19=, $pop18
+; CHECK-NEXT:    i32.or $push20=, $pop13, $pop19
+; CHECK-NEXT:    i16x8.eq $push22=, $6, $8
+; CHECK-NEXT:    i16x8.eq $push21=, $7, $8
+; CHECK-NEXT:    i8x16.shuffle $push23=, $pop22, $pop21, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; CHECK-NEXT:    i32.const $push32=, 7
+; CHECK-NEXT:    i8x16.shl $push24=, $pop23, $pop32
+; CHECK-NEXT:    i32.const $push31=, 7
+; CHECK-NEXT:    i8x16.shr_s $push25=, $pop24, $pop31
+; CHECK-NEXT:    v128.any_true $push26=, $pop25
+; CHECK-NEXT:    i32.or $push27=, $pop20, $pop26
+; CHECK-NEXT:    i64.extend_i32_u $push28=, $pop27
+; CHECK-NEXT:    i64.const $push29=, 1
+; CHECK-NEXT:    i64.and $push30=, $pop28, $pop29
+; CHECK-NEXT:    return $pop30
+  %1 = icmp eq <64 x i16> %v, zeroinitializer
+  %2 = bitcast <64 x i1> %1 to i64
+  %3 = icmp ne i64 %2, 0
+  %conv3 = zext i1 %3 to i64
+  ret i64 %conv3
+}
+
+define i32 @any_true_big_v32i32(<32 x i32> %v) {
+; CHECK-LABEL: any_true_big_v32i32:
+; CHECK:         .functype any_true_big_v32i32 (v128, v128, v128, v128, v128, v128, v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push52=, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push51=, $8=, $pop52
+; CHECK-NEXT:    i32x4.eq $push21=, $0, $pop51
+; CHECK-NEXT:    i32x4.eq $push20=, $1, $8
+; CHECK-NEXT:    i8x16.shuffle $push22=, $pop21, $pop20, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push50=, $2, $8
+; CHECK-NEXT:    local.tee $push49=, $1=, $pop50
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop49, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 8, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 9, $pop25
+; CHECK-NEXT:    i32x4.extract_lane $push27=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push28=, $pop26, 10, $pop27
+; CHECK-NEXT:    i32x4.extract_lane $push29=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push30=, $pop28, 11, $pop29
+; CHECK-NEXT:    i32x4.eq $push48=, $3, $8
+; CHECK-NEXT:    local.tee $push47=, $1=, $pop48
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop47, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 12, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 13, $pop33
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 14, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 15, $pop37
+; CHECK-NEXT:    v128.any_true $push39=, $pop38
+; CHECK-NEXT:    i32x4.eq $push1=, $4, $8
+; CHECK-NEXT:    i32x4.eq $push0=, $5, $8
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push46=, $6, $8
+; CHECK-NEXT:    local.tee $push45=, $1=, $pop46
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop45, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 8, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 9, $pop5
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 10, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 11, $pop9
+; CHECK-NEXT:    i32x4.eq $push44=, $7, $8
+; CHECK-NEXT:    local.tee $push43=, $8=, $pop44
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop43, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 12, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $8, 1
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 13, $pop13
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $8, 2
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 14, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $8, 3
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 15, $pop17
+; CHECK-NEXT:    v128.any_true $push19=, $pop18
+; CHECK-NEXT:    i32.or $push40=, $pop39, $pop19
+; CHECK-NEXT:    i32.const $push41=, 1
+; CHECK-NEXT:    i32.and $push42=, $pop40, $pop41
+; CHECK-NEXT:    return $pop42
+  %1 = icmp eq <32 x i32> %v, zeroinitializer
+  %2 = bitcast <32 x i1> %1 to i32
+  %3 = icmp ne i32 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+define i64 @any_true_big_v64i32(<64 x i32> %v) {
+; CHECK-LABEL: any_true_big_v64i32:
+; CHECK:         .functype any_true_big_v64i32 (v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push103=, 0, 0, 0, 0
+; CHECK-NEXT:    local.tee $push102=, $16=, $pop103
+; CHECK-NEXT:    i32x4.eq $push21=, $0, $pop102
+; CHECK-NEXT:    i32x4.eq $push20=, $1, $16
+; CHECK-NEXT:    i8x16.shuffle $push22=, $pop21, $pop20, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push101=, $2, $16
+; CHECK-NEXT:    local.tee $push100=, $1=, $pop101
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop100, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 8, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 9, $pop25
+; CHECK-NEXT:    i32x4.extract_lane $push27=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push28=, $pop26, 10, $pop27
+; CHECK-NEXT:    i32x4.extract_lane $push29=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push30=, $pop28, 11, $pop29
+; CHECK-NEXT:    i32x4.eq $push99=, $3, $16
+; CHECK-NEXT:    local.tee $push98=, $1=, $pop99
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop98, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 12, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 13, $pop33
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 14, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 15, $pop37
+; CHECK-NEXT:    v128.any_true $push39=, $pop38
+; CHECK-NEXT:    i32x4.eq $push1=, $4, $16
+; CHECK-NEXT:    i32x4.eq $push0=, $5, $16
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push97=, $6, $16
+; CHECK-NEXT:    local.tee $push96=, $1=, $pop97
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop96, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 8, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 9, $pop5
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 10, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 11, $pop9
+; CHECK-NEXT:    i32x4.eq $push95=, $7, $16
+; CHECK-NEXT:    local.tee $push94=, $1=, $pop95
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop94, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 12, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 13, $pop13
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 14, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 15, $pop17
+; CHECK-NEXT:    v128.any_true $push19=, $pop18
+; CHECK-NEXT:    i32.or $push40=, $pop39, $pop19
+; CHECK-NEXT:    i32x4.eq $push42=, $8, $16
+; CHECK-NEXT:    i32x4.eq $push41=, $9, $16
+; CHECK-NEXT:    i8x16.shuffle $push43=, $pop42, $pop41, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push93=, $10, $16
+; CHECK-NEXT:    local.tee $push92=, $1=, $pop93
+; CHECK-NEXT:    i32x4.extract_lane $push44=, $pop92, 0
+; CHECK-NEXT:    i8x16.replace_lane $push45=, $pop43, 8, $pop44
+; CHECK-NEXT:    i32x4.extract_lane $push46=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push47=, $pop45, 9, $pop46
+; CHECK-NEXT:    i32x4.extract_lane $push48=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push49=, $pop47, 10, $pop48
+; CHECK-NEXT:    i32x4.extract_lane $push50=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push51=, $pop49, 11, $pop50
+; CHECK-NEXT:    i32x4.eq $push91=, $11, $16
+; CHECK-NEXT:    local.tee $push90=, $1=, $pop91
+; CHECK-NEXT:    i32x4.extract_lane $push52=, $pop90, 0
+; CHECK-NEXT:    i8x16.replace_lane $push53=, $pop51, 12, $pop52
+; CHECK-NEXT:    i32x4.extract_lane $push54=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push55=, $pop53, 13, $pop54
+; CHECK-NEXT:    i32x4.extract_lane $push56=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push57=, $pop55, 14, $pop56
+; CHECK-NEXT:    i32x4.extract_lane $push58=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push59=, $pop57, 15, $pop58
+; CHECK-NEXT:    v128.any_true $push60=, $pop59
+; CHECK-NEXT:    i32.or $push61=, $pop40, $pop60
+; CHECK-NEXT:    i32x4.eq $push63=, $12, $16
+; CHECK-NEXT:    i32x4.eq $push62=, $13, $16
+; CHECK-NEXT:    i8x16.shuffle $push64=, $pop63, $pop62, 0, 4, 8, 12, 16, 20, 24, 28, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i32x4.eq $push89=, $14, $16
+; CHECK-NEXT:    local.tee $push88=, $1=, $pop89
+; CHECK-NEXT:    i32x4.extract_lane $push65=, $pop88, 0
+; CHECK-NEXT:    i8x16.replace_lane $push66=, $pop64, 8, $pop65
+; CHECK-NEXT:    i32x4.extract_lane $push67=, $1, 1
+; CHECK-NEXT:    i8x16.replace_lane $push68=, $pop66, 9, $pop67
+; CHECK-NEXT:    i32x4.extract_lane $push69=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push70=, $pop68, 10, $pop69
+; CHECK-NEXT:    i32x4.extract_lane $push71=, $1, 3
+; CHECK-NEXT:    i8x16.replace_lane $push72=, $pop70, 11, $pop71
+; CHECK-NEXT:    i32x4.eq $push87=, $15, $16
+; CHECK-NEXT:    local.tee $push86=, $16=, $pop87
+; CHECK-NEXT:    i32x4.extract_lane $push73=, $pop86, 0
+; CHECK-NEXT:    i8x16.replace_lane $push74=, $pop72, 12, $pop73
+; CHECK-NEXT:    i32x4.extract_lane $push75=, $16, 1
+; CHECK-NEXT:    i8x16.replace_lane $push76=, $pop74, 13, $pop75
+; CHECK-NEXT:    i32x4.extract_lane $push77=, $16, 2
+; CHECK-NEXT:    i8x16.replace_lane $push78=, $pop76, 14, $pop77
+; CHECK-NEXT:    i32x4.extract_lane $push79=, $16, 3
+; CHECK-NEXT:    i8x16.replace_lane $push80=, $pop78, 15, $pop79
+; CHECK-NEXT:    v128.any_true $push81=, $pop80
+; CHECK-NEXT:    i32.or $push82=, $pop61, $pop81
+; CHECK-NEXT:    i64.extend_i32_u $push83=, $pop82
+; CHECK-NEXT:    i64.const $push84=, 1
+; CHECK-NEXT:    i64.and $push85=, $pop83, $pop84
+; CHECK-NEXT:    return $pop85
+  %1 = icmp eq <64 x i32> %v, zeroinitializer
+  %2 = bitcast <64 x i1> %1 to i64
+  %3 = icmp ne i64 %2, 0
+  %conv3 = zext i1 %3 to i64
+  ret i64 %conv3
+}
+
+define i32 @any_true_big_v32i64(<32 x i64> %v) {
+; CHECK-LABEL: any_true_big_v32i64:
+; CHECK:         .functype any_true_big_v32i64 (v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128) -> (i32)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push84=, 0, 0
+; CHECK-NEXT:    local.tee $push83=, $16=, $pop84
+; CHECK-NEXT:    i64x2.eq $push29=, $0, $pop83
+; CHECK-NEXT:    i64x2.eq $push28=, $1, $16
+; CHECK-NEXT:    i8x16.shuffle $push30=, $pop29, $pop28, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push82=, $2, $16
+; CHECK-NEXT:    local.tee $push81=, $1=, $pop82
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop81, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 4, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 5, $pop33
+; CHECK-NEXT:    i64x2.eq $push80=, $3, $16
+; CHECK-NEXT:    local.tee $push79=, $1=, $pop80
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $pop79, 0
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 6, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 7, $pop37
+; CHECK-NEXT:    i64x2.eq $push78=, $4, $16
+; CHECK-NEXT:    local.tee $push77=, $1=, $pop78
+; CHECK-NEXT:    i32x4.extract_lane $push39=, $pop77, 0
+; CHECK-NEXT:    i8x16.replace_lane $push40=, $pop38, 8, $pop39
+; CHECK-NEXT:    i32x4.extract_lane $push41=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push42=, $pop40, 9, $pop41
+; CHECK-NEXT:    i64x2.eq $push76=, $5, $16
+; CHECK-NEXT:    local.tee $push75=, $1=, $pop76
+; CHECK-NEXT:    i32x4.extract_lane $push43=, $pop75, 0
+; CHECK-NEXT:    i8x16.replace_lane $push44=, $pop42, 10, $pop43
+; CHECK-NEXT:    i32x4.extract_lane $push45=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push46=, $pop44, 11, $pop45
+; CHECK-NEXT:    i64x2.eq $push74=, $6, $16
+; CHECK-NEXT:    local.tee $push73=, $1=, $pop74
+; CHECK-NEXT:    i32x4.extract_lane $push47=, $pop73, 0
+; CHECK-NEXT:    i8x16.replace_lane $push48=, $pop46, 12, $pop47
+; CHECK-NEXT:    i32x4.extract_lane $push49=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push50=, $pop48, 13, $pop49
+; CHECK-NEXT:    i64x2.eq $push72=, $7, $16
+; CHECK-NEXT:    local.tee $push71=, $1=, $pop72
+; CHECK-NEXT:    i32x4.extract_lane $push51=, $pop71, 0
+; CHECK-NEXT:    i8x16.replace_lane $push52=, $pop50, 14, $pop51
+; CHECK-NEXT:    i32x4.extract_lane $push53=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push54=, $pop52, 15, $pop53
+; CHECK-NEXT:    v128.any_true $push55=, $pop54
+; CHECK-NEXT:    i64x2.eq $push1=, $8, $16
+; CHECK-NEXT:    i64x2.eq $push0=, $9, $16
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push70=, $10, $16
+; CHECK-NEXT:    local.tee $push69=, $9=, $pop70
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop69, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 4, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 5, $pop5
+; CHECK-NEXT:    i64x2.eq $push68=, $11, $16
+; CHECK-NEXT:    local.tee $push67=, $9=, $pop68
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $pop67, 0
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 6, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 7, $pop9
+; CHECK-NEXT:    i64x2.eq $push66=, $12, $16
+; CHECK-NEXT:    local.tee $push65=, $9=, $pop66
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop65, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 8, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 9, $pop13
+; CHECK-NEXT:    i64x2.eq $push64=, $13, $16
+; CHECK-NEXT:    local.tee $push63=, $9=, $pop64
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $pop63, 0
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 10, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 11, $pop17
+; CHECK-NEXT:    i64x2.eq $push62=, $14, $16
+; CHECK-NEXT:    local.tee $push61=, $9=, $pop62
+; CHECK-NEXT:    i32x4.extract_lane $push19=, $pop61, 0
+; CHECK-NEXT:    i8x16.replace_lane $push20=, $pop18, 12, $pop19
+; CHECK-NEXT:    i32x4.extract_lane $push21=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push22=, $pop20, 13, $pop21
+; CHECK-NEXT:    i64x2.eq $push60=, $15, $16
+; CHECK-NEXT:    local.tee $push59=, $16=, $pop60
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop59, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 14, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $16, 2
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 15, $pop25
+; CHECK-NEXT:    v128.any_true $push27=, $pop26
+; CHECK-NEXT:    i32.or $push56=, $pop55, $pop27
+; CHECK-NEXT:    i32.const $push57=, 1
+; CHECK-NEXT:    i32.and $push58=, $pop56, $pop57
+; CHECK-NEXT:    return $pop58
+  %1 = icmp eq <32 x i64> %v, zeroinitializer
+  %2 = bitcast <32 x i1> %1 to i32
+  %3 = icmp ne i32 %2, 0
+  %conv3 = zext i1 %3 to i32
+  ret i32 %conv3
+}
+
+define i64 @any_true_big_v64i64(<64 x i64> %v) {
+; CHECK-LABEL: any_true_big_v64i64:
+; CHECK:         .functype any_true_big_v64i64 (v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128, v128) -> (i64)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    v128.const $push167=, 0, 0
+; CHECK-NEXT:    local.tee $push166=, $32=, $pop167
+; CHECK-NEXT:    i64x2.eq $push29=, $0, $pop166
+; CHECK-NEXT:    i64x2.eq $push28=, $1, $32
+; CHECK-NEXT:    i8x16.shuffle $push30=, $pop29, $pop28, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push165=, $2, $32
+; CHECK-NEXT:    local.tee $push164=, $1=, $pop165
+; CHECK-NEXT:    i32x4.extract_lane $push31=, $pop164, 0
+; CHECK-NEXT:    i8x16.replace_lane $push32=, $pop30, 4, $pop31
+; CHECK-NEXT:    i32x4.extract_lane $push33=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push34=, $pop32, 5, $pop33
+; CHECK-NEXT:    i64x2.eq $push163=, $3, $32
+; CHECK-NEXT:    local.tee $push162=, $1=, $pop163
+; CHECK-NEXT:    i32x4.extract_lane $push35=, $pop162, 0
+; CHECK-NEXT:    i8x16.replace_lane $push36=, $pop34, 6, $pop35
+; CHECK-NEXT:    i32x4.extract_lane $push37=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push38=, $pop36, 7, $pop37
+; CHECK-NEXT:    i64x2.eq $push161=, $4, $32
+; CHECK-NEXT:    local.tee $push160=, $1=, $pop161
+; CHECK-NEXT:    i32x4.extract_lane $push39=, $pop160, 0
+; CHECK-NEXT:    i8x16.replace_lane $push40=, $pop38, 8, $pop39
+; CHECK-NEXT:    i32x4.extract_lane $push41=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push42=, $pop40, 9, $pop41
+; CHECK-NEXT:    i64x2.eq $push159=, $5, $32
+; CHECK-NEXT:    local.tee $push158=, $1=, $pop159
+; CHECK-NEXT:    i32x4.extract_lane $push43=, $pop158, 0
+; CHECK-NEXT:    i8x16.replace_lane $push44=, $pop42, 10, $pop43
+; CHECK-NEXT:    i32x4.extract_lane $push45=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push46=, $pop44, 11, $pop45
+; CHECK-NEXT:    i64x2.eq $push157=, $6, $32
+; CHECK-NEXT:    local.tee $push156=, $1=, $pop157
+; CHECK-NEXT:    i32x4.extract_lane $push47=, $pop156, 0
+; CHECK-NEXT:    i8x16.replace_lane $push48=, $pop46, 12, $pop47
+; CHECK-NEXT:    i32x4.extract_lane $push49=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push50=, $pop48, 13, $pop49
+; CHECK-NEXT:    i64x2.eq $push155=, $7, $32
+; CHECK-NEXT:    local.tee $push154=, $1=, $pop155
+; CHECK-NEXT:    i32x4.extract_lane $push51=, $pop154, 0
+; CHECK-NEXT:    i8x16.replace_lane $push52=, $pop50, 14, $pop51
+; CHECK-NEXT:    i32x4.extract_lane $push53=, $1, 2
+; CHECK-NEXT:    i8x16.replace_lane $push54=, $pop52, 15, $pop53
+; CHECK-NEXT:    v128.any_true $push55=, $pop54
+; CHECK-NEXT:    i64x2.eq $push1=, $8, $32
+; CHECK-NEXT:    i64x2.eq $push0=, $9, $32
+; CHECK-NEXT:    i8x16.shuffle $push2=, $pop1, $pop0, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push153=, $10, $32
+; CHECK-NEXT:    local.tee $push152=, $9=, $pop153
+; CHECK-NEXT:    i32x4.extract_lane $push3=, $pop152, 0
+; CHECK-NEXT:    i8x16.replace_lane $push4=, $pop2, 4, $pop3
+; CHECK-NEXT:    i32x4.extract_lane $push5=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push6=, $pop4, 5, $pop5
+; CHECK-NEXT:    i64x2.eq $push151=, $11, $32
+; CHECK-NEXT:    local.tee $push150=, $9=, $pop151
+; CHECK-NEXT:    i32x4.extract_lane $push7=, $pop150, 0
+; CHECK-NEXT:    i8x16.replace_lane $push8=, $pop6, 6, $pop7
+; CHECK-NEXT:    i32x4.extract_lane $push9=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push10=, $pop8, 7, $pop9
+; CHECK-NEXT:    i64x2.eq $push149=, $12, $32
+; CHECK-NEXT:    local.tee $push148=, $9=, $pop149
+; CHECK-NEXT:    i32x4.extract_lane $push11=, $pop148, 0
+; CHECK-NEXT:    i8x16.replace_lane $push12=, $pop10, 8, $pop11
+; CHECK-NEXT:    i32x4.extract_lane $push13=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push14=, $pop12, 9, $pop13
+; CHECK-NEXT:    i64x2.eq $push147=, $13, $32
+; CHECK-NEXT:    local.tee $push146=, $9=, $pop147
+; CHECK-NEXT:    i32x4.extract_lane $push15=, $pop146, 0
+; CHECK-NEXT:    i8x16.replace_lane $push16=, $pop14, 10, $pop15
+; CHECK-NEXT:    i32x4.extract_lane $push17=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push18=, $pop16, 11, $pop17
+; CHECK-NEXT:    i64x2.eq $push145=, $14, $32
+; CHECK-NEXT:    local.tee $push144=, $9=, $pop145
+; CHECK-NEXT:    i32x4.extract_lane $push19=, $pop144, 0
+; CHECK-NEXT:    i8x16.replace_lane $push20=, $pop18, 12, $pop19
+; CHECK-NEXT:    i32x4.extract_lane $push21=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push22=, $pop20, 13, $pop21
+; CHECK-NEXT:    i64x2.eq $push143=, $15, $32
+; CHECK-NEXT:    local.tee $push142=, $9=, $pop143
+; CHECK-NEXT:    i32x4.extract_lane $push23=, $pop142, 0
+; CHECK-NEXT:    i8x16.replace_lane $push24=, $pop22, 14, $pop23
+; CHECK-NEXT:    i32x4.extract_lane $push25=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push26=, $pop24, 15, $pop25
+; CHECK-NEXT:    v128.any_true $push27=, $pop26
+; CHECK-NEXT:    i32.or $push56=, $pop55, $pop27
+; CHECK-NEXT:    i64x2.eq $push58=, $16, $32
+; CHECK-NEXT:    i64x2.eq $push57=, $17, $32
+; CHECK-NEXT:    i8x16.shuffle $push59=, $pop58, $pop57, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push141=, $18, $32
+; CHECK-NEXT:    local.tee $push140=, $9=, $pop141
+; CHECK-NEXT:    i32x4.extract_lane $push60=, $pop140, 0
+; CHECK-NEXT:    i8x16.replace_lane $push61=, $pop59, 4, $pop60
+; CHECK-NEXT:    i32x4.extract_lane $push62=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push63=, $pop61, 5, $pop62
+; CHECK-NEXT:    i64x2.eq $push139=, $19, $32
+; CHECK-NEXT:    local.tee $push138=, $9=, $pop139
+; CHECK-NEXT:    i32x4.extract_lane $push64=, $pop138, 0
+; CHECK-NEXT:    i8x16.replace_lane $push65=, $pop63, 6, $pop64
+; CHECK-NEXT:    i32x4.extract_lane $push66=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push67=, $pop65, 7, $pop66
+; CHECK-NEXT:    i64x2.eq $push137=, $20, $32
+; CHECK-NEXT:    local.tee $push136=, $9=, $pop137
+; CHECK-NEXT:    i32x4.extract_lane $push68=, $pop136, 0
+; CHECK-NEXT:    i8x16.replace_lane $push69=, $pop67, 8, $pop68
+; CHECK-NEXT:    i32x4.extract_lane $push70=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push71=, $pop69, 9, $pop70
+; CHECK-NEXT:    i64x2.eq $push135=, $21, $32
+; CHECK-NEXT:    local.tee $push134=, $9=, $pop135
+; CHECK-NEXT:    i32x4.extract_lane $push72=, $pop134, 0
+; CHECK-NEXT:    i8x16.replace_lane $push73=, $pop71, 10, $pop72
+; CHECK-NEXT:    i32x4.extract_lane $push74=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push75=, $pop73, 11, $pop74
+; CHECK-NEXT:    i64x2.eq $push133=, $22, $32
+; CHECK-NEXT:    local.tee $push132=, $9=, $pop133
+; CHECK-NEXT:    i32x4.extract_lane $push76=, $pop132, 0
+; CHECK-NEXT:    i8x16.replace_lane $push77=, $pop75, 12, $pop76
+; CHECK-NEXT:    i32x4.extract_lane $push78=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push79=, $pop77, 13, $pop78
+; CHECK-NEXT:    i64x2.eq $push131=, $23, $32
+; CHECK-NEXT:    local.tee $push130=, $9=, $pop131
+; CHECK-NEXT:    i32x4.extract_lane $push80=, $pop130, 0
+; CHECK-NEXT:    i8x16.replace_lane $push81=, $pop79, 14, $pop80
+; CHECK-NEXT:    i32x4.extract_lane $push82=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push83=, $pop81, 15, $pop82
+; CHECK-NEXT:    v128.any_true $push84=, $pop83
+; CHECK-NEXT:    i32.or $push85=, $pop56, $pop84
+; CHECK-NEXT:    i64x2.eq $push87=, $24, $32
+; CHECK-NEXT:    i64x2.eq $push86=, $25, $32
+; CHECK-NEXT:    i8x16.shuffle $push88=, $pop87, $pop86, 0, 8, 16, 24, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; CHECK-NEXT:    i64x2.eq $push129=, $26, $32
+; CHECK-NEXT:    local.tee $push128=, $9=, $pop129
+; CHECK-NEXT:    i32x4.extract_lane $push89=, $pop128, 0
+; CHECK-NEXT:    i8x16.replace_lane $push90=, $pop88, 4, $pop89
+; CHECK-NEXT:    i32x4.extract_lane $push91=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push92=, $pop90, 5, $pop91
+; CHECK-NEXT:    i64x2.eq $push127=, $27, $32
+; CHECK-NEXT:    local.tee $push126=, $9=, $pop127
+; CHECK-NEXT:    i32x4.extract_lane $push93=, $pop126, 0
+; CHECK-NEXT:    i8x16.replace_lane $push94=, $pop92, 6, $pop93
+; CHECK-NEXT:    i32x4.extract_lane $push95=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push96=, $pop94, 7, $pop95
+; CHECK-NEXT:    i64x2.eq $push125=, $28, $32
+; CHECK-NEXT:    local.tee $push124=, $9=, $pop125
+; CHECK-NEXT:    i32x4.extract_lane $push97=, $pop124, 0
+; CHECK-NEXT:    i8x16.replace_lane $push98=, $pop96, 8, $pop97
+; CHECK-NEXT:    i32x4.extract_lane $push99=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push100=, $pop98, 9, $pop99
+; CHECK-NEXT:    i64x2.eq $push123=, $29, $32
+; CHECK-NEXT:    local.tee $push122=, $9=, $pop123
+; CHECK-NEXT:    i32x4.extract_lane $push101=, $pop122, 0
+; CHECK-NEXT:    i8x16.replace_lane $push102=, $pop100, 10, $pop101
+; CHECK-NEXT:    i32x4.extract_lane $push103=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push104=, $pop102, 11, $pop103
+; CHECK-NEXT:    i64x2.eq $push121=, $30, $32
+; CHECK-NEXT:    local.tee $push120=, $9=, $pop121
+; CHECK-NEXT:    i32x4.extract_lane $push105=, $pop120, 0
+; CHECK-NEXT:    i8x16.replace_lane $push106=, $pop104, 12, $pop105
+; CHECK-NEXT:    i32x4.extract_lane $push107=, $9, 2
+; CHECK-NEXT:    i8x16.replace_lane $push108=, $pop106, 13, $pop107
+; CHECK-NEXT:    i64x2.eq $push119=, $31, $32
+; CHECK-NEXT:    local.tee $push118=, $32=, $pop119
+; CHECK-NEXT:    i32x4.extract_lane $push109=, $pop118, 0
+; CHECK-NEXT:    i8x16.replace_lane $push110=, $pop108, 14, $pop109
+; CHECK-NEXT:    i32x4.extract_lane $push111=, $32, 2
+; CHECK-NEXT:    i8x16.replace_lane $push112=, $pop110, 15, $pop111
+; CHECK-NEXT:    v128.any_true $push113=, $pop112
+; CHECK-NEXT:    i32.or $push114=, $pop85, $pop113
+; CHECK-NEXT:    i64.extend_i32_u $push115=, $pop114
+; CHECK-NEXT:    i64.const $push116=, 1
+; CHECK-NEXT:    i64.and $push117=, $pop115, $pop116
+; CHECK-NEXT:    return $pop117
+  %1 = icmp eq <64 x i64> %v, zeroinitializer
+  %2 = bitcast <64 x i1> %1 to i64
+  %3 = icmp ne i64 %2, 0
+  %conv3 = zext i1 %3 to i64
+  ret i64 %conv3
+}
+
+
 attributes #0 = { "target-features"="+atomics" }
 
diff --git a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
index 70c6baf2be005..a6100dd08d7c6 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-vecreduce-bool.ll
@@ -304,775 +304,67 @@ define i1 @test_all_v8i8(<8 x i8> %x) {
   ret i1 %ret
 }
 
-;; TODO: Improve this codegen.
 define i1 @test_any_v64i8(<64 x i8> %x) {
 ; CHECK-LABEL: test_any_v64i8:
 ; CHECK:         .functype test_any_v64i8 (v128, v128, v128, v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    global.get $push264=, __stack_pointer
-; CHECK-NEXT:    i32.const $push265=, 16
-; CHECK-NEXT:    i32.sub $drop=, $pop264, $pop265
-; CHECK-NEXT:    i8x16.extract_lane_u $push254=, $0, 15
-; CHECK-NEXT:    i8x16.extract_lane_u $push253=, $2, 15
-; CHECK-NEXT:    i32.or $push255=, $pop254, $pop253
-; CHECK-NEXT:    i32.const $push133=, 15
-; CHECK-NEXT:    i32.shl $push256=, $pop255, $pop133
-; CHECK-NEXT:    i8x16.extract_lane_u $push195=, $0, 0
-; CHECK-NEXT:    i32.const $push1=, 1
-; CHECK-NEXT:    i32.and $push196=, $pop195, $pop1
-; CHECK-NEXT:    i8x16.extract_lane_u $push197=, $0, 1
-; CHECK-NEXT:    i32.const $push368=, 1
-; CHECK-NEXT:    i32.and $push198=, $pop197, $pop368
-; CHECK-NEXT:    i32.const $push367=, 1
-; CHECK-NEXT:    i32.shl $push199=, $pop198, $pop367
-; CHECK-NEXT:    i32.or $push200=, $pop196, $pop199
-; CHECK-NEXT:    i8x16.extract_lane_u $push201=, $0, 2
-; CHECK-NEXT:    i32.const $push366=, 1
-; CHECK-NEXT:    i32.and $push202=, $pop201, $pop366
-; CHECK-NEXT:    i32.const $push9=, 2
-; CHECK-NEXT:    i32.shl $push203=, $pop202, $pop9
-; CHECK-NEXT:    i32.or $push204=, $pop200, $pop203
-; CHECK-NEXT:    i8x16.extract_lane_u $push205=, $0, 3
-; CHECK-NEXT:    i32.const $push365=, 1
-; CHECK-NEXT:    i32.and $push206=, $pop205, $pop365
-; CHECK-NEXT:    i32.const $push14=, 3
-; CHECK-NEXT:    i32.shl $push207=, $pop206, $pop14
-; CHECK-NEXT:    i32.or $push208=, $pop204, $pop207
-; CHECK-NEXT:    i8x16.extract_lane_u $push209=, $0, 4
-; CHECK-NEXT:    i32.const $push364=, 1
-; CHECK-NEXT:    i32.and $push210=, $pop209, $pop364
-; CHECK-NEXT:    i32.const $push19=, 4
-; CHECK-NEXT:    i32.shl $push211=, $pop210, $pop19
-; CHECK-NEXT:    i32.or $push212=, $pop208, $pop211
-; CHECK-NEXT:    i8x16.extract_lane_u $push213=, $0, 5
-; CHECK-NEXT:    i32.const $push363=, 1
-; CHECK-NEXT:    i32.and $push214=, $pop213, $pop363
-; CHECK-NEXT:    i32.const $push24=, 5
-; CHECK-NEXT:    i32.shl $push215=, $pop214, $pop24
-; CHECK-NEXT:    i32.or $push216=, $pop212, $pop215
-; CHECK-NEXT:    i8x16.extract_lane_u $push217=, $0, 6
-; CHECK-NEXT:    i32.const $push362=, 1
-; CHECK-NEXT:    i32.and $push218=, $pop217, $pop362
-; CHECK-NEXT:    i32.const $push29=, 6
-; CHECK-NEXT:    i32.shl $push219=, $pop218, $pop29
-; CHECK-NEXT:    i32.or $push220=, $pop216, $pop219
-; CHECK-NEXT:    i8x16.extract_lane_u $push221=, $0, 7
-; CHECK-NEXT:    i32.const $push361=, 1
-; CHECK-NEXT:    i32.and $push222=, $pop221, $pop361
-; CHECK-NEXT:    i32.const $push34=, 7
-; CHECK-NEXT:    i32.shl $push223=, $pop222, $pop34
-; CHECK-NEXT:    i32.or $push224=, $pop220, $pop223
-; CHECK-NEXT:    i8x16.extract_lane_u $push225=, $0, 8
-; CHECK-NEXT:    i32.const $push360=, 1
-; CHECK-NEXT:    i32.and $push226=, $pop225, $pop360
-; CHECK-NEXT:    i32.const $push39=, 8
-; CHECK-NEXT:    i32.shl $push227=, $pop226, $pop39
-; CHECK-NEXT:    i32.or $push228=, $pop224, $pop227
-; CHECK-NEXT:    i8x16.extract_lane_u $push229=, $0, 9
-; CHECK-NEXT:    i32.const $push359=, 1
-; CHECK-NEXT:    i32.and $push230=, $pop229, $pop359
-; CHECK-NEXT:    i32.const $push44=, 9
-; CHECK-NEXT:    i32.shl $push231=, $pop230, $pop44
-; CHECK-NEXT:    i32.or $push232=, $pop228, $pop231
-; CHECK-NEXT:    i8x16.extract_lane_u $push233=, $0, 10
-; CHECK-NEXT:    i32.const $push358=, 1
-; CHECK-NEXT:    i32.and $push234=, $pop233, $pop358
-; CHECK-NEXT:    i32.const $push49=, 10
-; CHECK-NEXT:    i32.shl $push235=, $pop234, $pop49
-; CHECK-NEXT:    i32.or $push236=, $pop232, $pop235
-; CHECK-NEXT:    i8x16.extract_lane_u $push237=, $0, 11
-; CHECK-NEXT:    i32.const $push357=, 1
-; CHECK-NEXT:    i32.and $push238=, $pop237, $pop357
-; CHECK-NEXT:    i32.const $push54=, 11
-; CHECK-NEXT:    i32.shl $push239=, $pop238, $pop54
-; CHECK-NEXT:    i32.or $push240=, $pop236, $pop239
-; CHECK-NEXT:    i8x16.extract_lane_u $push241=, $0, 12
-; CHECK-NEXT:    i32.const $push356=, 1
-; CHECK-NEXT:    i32.and $push242=, $pop241, $pop356
-; CHECK-NEXT:    i32.const $push59=, 12
-; CHECK-NEXT:    i32.shl $push243=, $pop242, $pop59
-; CHECK-NEXT:    i32.or $push244=, $pop240, $pop243
-; CHECK-NEXT:    i8x16.extract_lane_u $push245=, $0, 13
-; CHECK-NEXT:    i32.const $push355=, 1
-; CHECK-NEXT:    i32.and $push246=, $pop245, $pop355
-; CHECK-NEXT:    i32.const $push64=, 13
-; CHECK-NEXT:    i32.shl $push247=, $pop246, $pop64
-; CHECK-NEXT:    i32.or $push248=, $pop244, $pop247
-; CHECK-NEXT:    i8x16.extract_lane_u $push249=, $0, 14
-; CHECK-NEXT:    i32.const $push354=, 1
-; CHECK-NEXT:    i32.and $push250=, $pop249, $pop354
-; CHECK-NEXT:    i32.const $push69=, 14
-; CHECK-NEXT:    i32.shl $push251=, $pop250, $pop69
-; CHECK-NEXT:    i32.or $push252=, $pop248, $pop251
-; CHECK-NEXT:    i32.or $push257=, $pop256, $pop252
-; CHECK-NEXT:    i8x16.extract_lane_u $push137=, $2, 0
-; CHECK-NEXT:    i32.const $push353=, 1
-; CHECK-NEXT:    i32.and $push138=, $pop137, $pop353
-; CHECK-NEXT:    i8x16.extract_lane_u $push139=, $2, 1
-; CHECK-NEXT:    i32.const $push352=, 1
-; CHECK-NEXT:    i32.and $push140=, $pop139, $pop352
-; CHECK-NEXT:    i32.const $push351=, 1
-; CHECK-NEXT:    i32.shl $push141=, $pop140, $pop351
-; CHECK-NEXT:    i32.or $push142=, $pop138, $pop141
-; CHECK-NEXT:    i8x16.extract_lane_u $push143=, $2, 2
-; CHECK-NEXT:    i32.const $push350=, 1
-; CHECK-NEXT:    i32.and $push144=, $pop143, $pop350
-; CHECK-NEXT:    i32.const $push349=, 2
-; CHECK-NEXT:    i32.shl $push145=, $pop144, $pop349
-; CHECK-NEXT:    i32.or $push146=, $pop142, $pop145
-; CHECK-NEXT:    i8x16.extract_lane_u $push147=, $2, 3
-; CHECK-NEXT:    i32.const $push348=, 1
-; CHECK-NEXT:    i32.and $push148=, $pop147, $pop348
-; CHECK-NEXT:    i32.const $push347=, 3
-; CHECK-NEXT:    i32.shl $push149=, $pop148, $pop347
-; CHECK-NEXT:    i32.or $push150=, $pop146, $pop149
-; CHECK-NEXT:    i8x16.extract_lane_u $push151=, $2, 4
-; CHECK-NEXT:    i32.const $push346=, 1
-; CHECK-NEXT:    i32.and $push152=, $pop151, $pop346
-; CHECK-NEXT:    i32.const $push345=, 4
-; CHECK-NEXT:    i32.shl $push153=, $pop152, $pop345
-; CHECK-NEXT:    i32.or $push154=, $pop150, $pop153
-; CHECK-NEXT:    i8x16.extract_lane_u $push155=, $2, 5
-; CHECK-NEXT:    i32.const $push344=, 1
-; CHECK-NEXT:    i32.and $push156=, $pop155, $pop344
-; CHECK-NEXT:    i32.const $push343=, 5
-; CHECK-NEXT:    i32.shl $push157=, $pop156, $pop343
-; CHECK-NEXT:    i32.or $push158=, $pop154, $pop157
-; CHECK-NEXT:    i8x16.extract_lane_u $push159=, $2, 6
-; CHECK-NEXT:    i32.const $push342=, 1
-; CHECK-NEXT:    i32.and $push160=, $pop159, $pop342
-; CHECK-NEXT:    i32.const $push341=, 6
-; CHECK-NEXT:    i32.shl $push161=, $pop160, $pop341
-; CHECK-NEXT:    i32.or $push162=, $pop158, $pop161
-; CHECK-NEXT:    i8x16.extract_lane_u $push163=, $2, 7
-; CHECK-NEXT:    i32.const $push340=, 1
-; CHECK-NEXT:    i32.and $push164=, $pop163, $pop340
-; CHECK-NEXT:    i32.const $push339=, 7
-; CHECK-NEXT:    i32.shl $push165=, $pop164, $pop339
-; CHECK-NEXT:    i32.or $push166=, $pop162, $pop165
-; CHECK-NEXT:    i8x16.extract_lane_u $push167=, $2, 8
-; CHECK-NEXT:    i32.const $push338=, 1
-; CHECK-NEXT:    i32.and $push168=, $pop167, $pop338
-; CHECK-NEXT:    i32.const $push337=, 8
-; CHECK-NEXT:    i32.shl $push169=, $pop168, $pop337
-; CHECK-NEXT:    i32.or $push170=, $pop166, $pop169
-; CHECK-NEXT:    i8x16.extract_lane_u $push171=, $2, 9
-; CHECK-NEXT:    i32.const $push336=, 1
-; CHECK-NEXT:    i32.and $push172=, $pop171, $pop336
-; CHECK-NEXT:    i32.const $push335=, 9
-; CHECK-NEXT:    i32.shl $push173=, $pop172, $pop335
-; CHECK-NEXT:    i32.or $push174=, $pop170, $pop173
-; CHECK-NEXT:    i8x16.extract_lane_u $push175=, $2, 10
-; CHECK-NEXT:    i32.const $push334=, 1
-; CHECK-NEXT:    i32.and $push176=, $pop175, $pop334
-; CHECK-NEXT:    i32.const $push333=, 10
-; CHECK-NEXT:    i32.shl $push177=, $pop176, $pop333
-; CHECK-NEXT:    i32.or $push178=, $pop174, $pop177
-; CHECK-NEXT:    i8x16.extract_lane_u $push179=, $2, 11
-; CHECK-NEXT:    i32.const $push332=, 1
-; CHECK-NEXT:    i32.and $push180=, $pop179, $pop332
-; CHECK-NEXT:    i32.const $push331=, 11
-; CHECK-NEXT:    i32.shl $push181=, $pop180, $pop331
-; CHECK-NEXT:    i32.or $push182=, $pop178, $pop181
-; CHECK-NEXT:    i8x16.extract_lane_u $push183=, $2, 12
-; CHECK-NEXT:    i32.const $push330=, 1
-; CHECK-NEXT:    i32.and $push184=, $pop183, $pop330
-; CHECK-NEXT:    i32.const $push329=, 12
-; CHECK-NEXT:    i32.shl $push185=, $pop184, $pop329
-; CHECK-NEXT:    i32.or $push186=, $pop182, $pop185
-; CHECK-NEXT:    i8x16.extract_lane_u $push187=, $2, 13
-; CHECK-NEXT:    i32.const $push328=, 1
-; CHECK-NEXT:    i32.and $push188=, $pop187, $pop328
-; CHECK-NEXT:    i32.const $push327=, 13
-; CHECK-NEXT:    i32.shl $push189=, $pop188, $pop327
-; CHECK-NEXT:    i32.or $push190=, $pop186, $pop189
-; CHECK-NEXT:    i8x16.extract_lane_u $push191=, $2, 14
-; CHECK-NEXT:    i32.const $push326=, 1
-; CHECK-NEXT:    i32.and $push192=, $pop191, $pop326
-; CHECK-NEXT:    i32.const $push325=, 14
-; CHECK-NEXT:    i32.shl $push193=, $pop192, $pop325
-; CHECK-NEXT:    i32.or $push194=, $pop190, $pop193
-; CHECK-NEXT:    i32.or $push258=, $pop257, $pop194
-; CHECK-NEXT:    i8x16.extract_lane_u $push131=, $1, 15
-; CHECK-NEXT:    i8x16.extract_lane_u $push130=, $3, 15
-; CHECK-NEXT:    i32.or $push132=, $pop131, $pop130
-; CHECK-NEXT:    i32.const $push324=, 15
-; CHECK-NEXT:    i32.shl $push134=, $pop132, $pop324
-; CHECK-NEXT:    i8x16.extract_lane_u $push72=, $1, 0
-; CHECK-NEXT:    i32.const $push323=, 1
-; CHECK-NEXT:    i32.and $push73=, $pop72, $pop323
-; CHECK-NEXT:    i8x16.extract_lane_u $push74=, $1, 1
-; CHECK-NEXT:    i32.const $push322=, 1
-; CHECK-NEXT:    i32.and $push75=, $pop74, $pop322
-; CHECK-NEXT:    i32.const $push321=, 1
-; CHECK-NEXT:    i32.shl $push76=, $pop75, $pop321
-; CHECK-NEXT:    i32.or $push77=, $pop73, $pop76
-; CHECK-NEXT:    i8x16.extract_lane_u $push78=, $1, 2
-; CHECK-NEXT:    i32.const $push320=, 1
-; CHECK-NEXT:    i32.and $push79=, $pop78, $pop320
-; CHECK-NEXT:    i32.const $push319=, 2
-; CHECK-NEXT:    i32.shl $push80=, $pop79, $pop319
-; CHECK-NEXT:    i32.or $push81=, $pop77, $pop80
-; CHECK-NEXT:    i8x16.extract_lane_u $push82=, $1, 3
-; CHECK-NEXT:    i32.const $push318=, 1
-; CHECK-NEXT:    i32.and $push83=, $pop82, $pop318
-; CHECK-NEXT:    i32.const $push317=, 3
-; CHECK-NEXT:    i32.shl $push84=, $pop83, $pop317
-; CHECK-NEXT:    i32.or $push85=, $pop81, $pop84
-; CHECK-NEXT:    i8x16.extract_lane_u $push86=, $1, 4
-; CHECK-NEXT:    i32.const $push316=, 1
-; CHECK-NEXT:    i32.and $push87=, $pop86, $pop316
-; CHECK-NEXT:    i32.const $push315=, 4
-; CHECK-NEXT:    i32.shl $push88=, $pop87, $pop315
-; CHECK-NEXT:    i32.or $push89=, $pop85, $pop88
-; CHECK-NEXT:    i8x16.extract_lane_u $push90=, $1, 5
-; CHECK-NEXT:    i32.const $push314=, 1
-; CHECK-NEXT:    i32.and $push91=, $pop90, $pop314
-; CHECK-NEXT:    i32.const $push313=, 5
-; CHECK-NEXT:    i32.shl $push92=, $pop91, $pop313
-; CHECK-NEXT:    i32.or $push93=, $pop89, $pop92
-; CHECK-NEXT:    i8x16.extract_lane_u $push94=, $1, 6
-; CHECK-NEXT:    i32.const $push312=, 1
-; CHECK-NEXT:    i32.and $push95=, $pop94, $pop312
-; CHECK-NEXT:    i32.const $push311=, 6
-; CHECK-NEXT:    i32.shl $push96=, $pop95, $pop311
-; CHECK-NEXT:    i32.or $push97=, $pop93, $pop96
-; CHECK-NEXT:    i8x16.extract_lane_u $push98=, $1, 7
-; CHECK-NEXT:    i32.const $push310=, 1
-; CHECK-NEXT:    i32.and $push99=, $pop98, $pop310
-; CHECK-NEXT:    i32.const $push309=, 7
-; CHECK-NEXT:    i32.shl $push100=, $pop99, $pop309
-; CHECK-NEXT:    i32.or $push101=, $pop97, $pop100
-; CHECK-NEXT:    i8x16.extract_lane_u $push102=, $1, 8
-; CHECK-NEXT:    i32.const $push308=, 1
-; CHECK-NEXT:    i32.and $push103=, $pop102, $pop308
-; CHECK-NEXT:    i32.const $push307=, 8
-; CHECK-NEXT:    i32.shl $push104=, $pop103, $pop307
-; CHECK-NEXT:    i32.or $push105=, $pop101, $pop104
-; CHECK-NEXT:    i8x16.extract_lane_u $push106=, $1, 9
-; CHECK-NEXT:    i32.const $push306=, 1
-; CHECK-NEXT:    i32.and $push107=, $pop106, $pop306
-; CHECK-NEXT:    i32.const $push305=, 9
-; CHECK-NEXT:    i32.shl $push108=, $pop107, $pop305
-; CHECK-NEXT:    i32.or $push109=, $pop105, $pop108
-; CHECK-NEXT:    i8x16.extract_lane_u $push110=, $1, 10
-; CHECK-NEXT:    i32.const $push304=, 1
-; CHECK-NEXT:    i32.and $push111=, $pop110, $pop304
-; CHECK-NEXT:    i32.const $push303=, 10
-; CHECK-NEXT:    i32.shl $push112=, $pop111, $pop303
-; CHECK-NEXT:    i32.or $push113=, $pop109, $pop112
-; CHECK-NEXT:    i8x16.extract_lane_u $push114=, $1, 11
-; CHECK-NEXT:    i32.const $push302=, 1
-; CHECK-NEXT:    i32.and $push115=, $pop114, $pop302
-; CHECK-NEXT:    i32.const $push301=, 11
-; CHECK-NEXT:    i32.shl $push116=, $pop115, $pop301
-; CHECK-NEXT:    i32.or $push117=, $pop113, $pop116
-; CHECK-NEXT:    i8x16.extract_lane_u $push118=, $1, 12
-; CHECK-NEXT:    i32.const $push300=, 1
-; CHECK-NEXT:    i32.and $push119=, $pop118, $pop300
-; CHECK-NEXT:    i32.const $push299=, 12
-; CHECK-NEXT:    i32.shl $push120=, $pop119, $pop299
-; CHECK-NEXT:    i32.or $push121=, $pop117, $pop120
-; CHECK-NEXT:    i8x16.extract_lane_u $push122=, $1, 13
-; CHECK-NEXT:    i32.const $push298=, 1
-; CHECK-NEXT:    i32.and $push123=, $pop122, $pop298
-; CHECK-NEXT:    i32.const $push297=, 13
-; CHECK-NEXT:    i32.shl $push124=, $pop123, $pop297
-; CHECK-NEXT:    i32.or $push125=, $pop121, $pop124
-; CHECK-NEXT:    i8x16.extract_lane_u $push126=, $1, 14
-; CHECK-NEXT:    i32.const $push296=, 1
-; CHECK-NEXT:    i32.and $push127=, $pop126, $pop296
-; CHECK-NEXT:    i32.const $push295=, 14
-; CHECK-NEXT:    i32.shl $push128=, $pop127, $pop295
-; CHECK-NEXT:    i32.or $push129=, $pop125, $pop128
-; CHECK-NEXT:    i32.or $push135=, $pop134, $pop129
-; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $3, 0
-; CHECK-NEXT:    i32.const $push294=, 1
-; CHECK-NEXT:    i32.and $push2=, $pop0, $pop294
-; CHECK-NEXT:    i8x16.extract_lane_u $push3=, $3, 1
-; CHECK-NEXT:    i32.const $push293=, 1
-; CHECK-NEXT:    i32.and $push4=, $pop3, $pop293
-; CHECK-NEXT:    i32.const $push292=, 1
-; CHECK-NEXT:    i32.shl $push5=, $pop4, $pop292
-; CHECK-NEXT:    i32.or $push6=, $pop2, $pop5
-; CHECK-NEXT:    i8x16.extract_lane_u $push7=, $3, 2
-; CHECK-NEXT:    i32.const $push291=, 1
-; CHECK-NEXT:    i32.and $push8=, $pop7, $pop291
-; CHECK-NEXT:    i32.const $push290=, 2
-; CHECK-NEXT:    i32.shl $push10=, $pop8, $pop290
-; CHECK-NEXT:    i32.or $push11=, $pop6, $pop10
-; CHECK-NEXT:    i8x16.extract_lane_u $push12=, $3, 3
-; CHECK-NEXT:    i32.const $push289=, 1
-; CHECK-NEXT:    i32.and $push13=, $pop12, $pop289
-; CHECK-NEXT:    i32.const $push288=, 3
-; CHECK-NEXT:    i32.shl $push15=, $pop13, $pop288
-; CHECK-NEXT:    i32.or $push16=, $pop11, $pop15
-; CHECK-NEXT:    i8x16.extract_lane_u $push17=, $3, 4
-; CHECK-NEXT:    i32.const $push287=, 1
-; CHECK-NEXT:    i32.and $push18=, $pop17, $pop287
-; CHECK-NEXT:    i32.const $push286=, 4
-; CHECK-NEXT:    i32.shl $push20=, $pop18, $pop286
-; CHECK-NEXT:    i32.or $push21=, $pop16, $pop20
-; CHECK-NEXT:    i8x16.extract_lane_u $push22=, $3, 5
-; CHECK-NEXT:    i32.const $push285=, 1
-; CHECK-NEXT:    i32.and $push23=, $pop22, $pop285
-; CHECK-NEXT:    i32.const $push284=, 5
-; CHECK-NEXT:    i32.shl $push25=, $pop23, $pop284
-; CHECK-NEXT:    i32.or $push26=, $pop21, $pop25
-; CHECK-NEXT:    i8x16.extract_lane_u $push27=, $3, 6
-; CHECK-NEXT:    i32.const $push283=, 1
-; CHECK-NEXT:    i32.and $push28=, $pop27, $pop283
-; CHECK-NEXT:    i32.const $push282=, 6
-; CHECK-NEXT:    i32.shl $push30=, $pop28, $pop282
-; CHECK-NEXT:    i32.or $push31=, $pop26, $pop30
-; CHECK-NEXT:    i8x16.extract_lane_u $push32=, $3, 7
-; CHECK-NEXT:    i32.const $push281=, 1
-; CHECK-NEXT:    i32.and $push33=, $pop32, $pop281
-; CHECK-NEXT:    i32.const $push280=, 7
-; CHECK-NEXT:    i32.shl $push35=, $pop33, $pop280
-; CHECK-NEXT:    i32.or $push36=, $pop31, $pop35
-; CHECK-NEXT:    i8x16.extract_lane_u $push37=, $3, 8
-; CHECK-NEXT:    i32.const $push279=, 1
-; CHECK-NEXT:    i32.and $push38=, $pop37, $pop279
-; CHECK-NEXT:    i32.const $push278=, 8
-; CHECK-NEXT:    i32.shl $push40=, $pop38, $pop278
-; CHECK-NEXT:    i32.or $push41=, $pop36, $pop40
-; CHECK-NEXT:    i8x16.extract_lane_u $push42=, $3, 9
-; CHECK-NEXT:    i32.const $push277=, 1
-; CHECK-NEXT:    i32.and $push43=, $pop42, $pop277
-; CHECK-NEXT:    i32.const $push276=, 9
-; CHECK-NEXT:    i32.shl $push45=, $pop43, $pop276
-; CHECK-NEXT:    i32.or $push46=, $pop41, $pop45
-; CHECK-NEXT:    i8x16.extract_lane_u $push47=, $3, 10
-; CHECK-NEXT:    i32.const $push275=, 1
-; CHECK-NEXT:    i32.and $push48=, $pop47, $pop275
-; CHECK-NEXT:    i32.const $push274=, 10
-; CHECK-NEXT:    i32.shl $push50=, $pop48, $pop274
-; CHECK-NEXT:    i32.or $push51=, $pop46, $pop50
-; CHECK-NEXT:    i8x16.extract_lane_u $push52=, $3, 11
-; CHECK-NEXT:    i32.const $push273=, 1
-; CHECK-NEXT:    i32.and $push53=, $pop52, $pop273
-; CHECK-NEXT:    i32.const $push272=, 11
-; CHECK-NEXT:    i32.shl $push55=, $pop53, $pop272
-; CHECK-NEXT:    i32.or $push56=, $pop51, $pop55
-; CHECK-NEXT:    i8x16.extract_lane_u $push57=, $3, 12
-; CHECK-NEXT:    i32.const $push271=, 1
-; CHECK-NEXT:    i32.and $push58=, $pop57, $pop271
-; CHECK-NEXT:    i32.const $push270=, 12
-; CHECK-NEXT:    i32.shl $push60=, $pop58, $pop270
-; CHECK-NEXT:    i32.or $push61=, $pop56, $pop60
-; CHECK-NEXT:    i8x16.extract_lane_u $push62=, $3, 13
-; CHECK-NEXT:    i32.const $push269=, 1
-; CHECK-NEXT:    i32.and $push63=, $pop62, $pop269
-; CHECK-NEXT:    i32.const $push268=, 13
-; CHECK-NEXT:    i32.shl $push65=, $pop63, $pop268
-; CHECK-NEXT:    i32.or $push66=, $pop61, $pop65
-; CHECK-NEXT:    i8x16.extract_lane_u $push67=, $3, 14
-; CHECK-NEXT:    i32.const $push267=, 1
-; CHECK-NEXT:    i32.and $push68=, $pop67, $pop267
-; CHECK-NEXT:    i32.const $push266=, 14
-; CHECK-NEXT:    i32.shl $push70=, $pop68, $pop266
-; CHECK-NEXT:    i32.or $push71=, $pop66, $pop70
-; CHECK-NEXT:    i32.or $push136=, $pop135, $pop71
-; CHECK-NEXT:    i32.or $push259=, $pop258, $pop136
-; CHECK-NEXT:    i32.const $push260=, 65535
-; CHECK-NEXT:    i32.and $push261=, $pop259, $pop260
-; CHECK-NEXT:    i32.const $push262=, 0
-; CHECK-NEXT:    i32.ne $push263=, $pop261, $pop262
-; CHECK-NEXT:    return $pop263
+; CHECK-NEXT:    i32.const $push0=, 7
+; CHECK-NEXT:    i8x16.shl $push4=, $0, $pop0
+; CHECK-NEXT:    i32.const $push22=, 7
+; CHECK-NEXT:    i8x16.shr_s $push5=, $pop4, $pop22
+; CHECK-NEXT:    v128.any_true $push6=, $pop5
+; CHECK-NEXT:    i32.const $push21=, 7
+; CHECK-NEXT:    i8x16.shl $push1=, $1, $pop21
+; CHECK-NEXT:    i32.const $push20=, 7
+; CHECK-NEXT:    i8x16.shr_s $push2=, $pop1, $pop20
+; CHECK-NEXT:    v128.any_true $push3=, $pop2
+; CHECK-NEXT:    i32.or $push7=, $pop6, $pop3
+; CHECK-NEXT:    i32.const $push19=, 7
+; CHECK-NEXT:    i8x16.shl $push8=, $2, $pop19
+; CHECK-NEXT:    i32.const $push18=, 7
+; CHECK-NEXT:    i8x16.shr_s $push9=, $pop8, $pop18
+; CHECK-NEXT:    v128.any_true $push10=, $pop9
+; CHECK-NEXT:    i32.or $push11=, $pop7, $pop10
+; CHECK-NEXT:    i32.const $push17=, 7
+; CHECK-NEXT:    i8x16.shl $push12=, $3, $pop17
+; CHECK-NEXT:    i32.const $push16=, 7
+; CHECK-NEXT:    i8x16.shr_s $push13=, $pop12, $pop16
+; CHECK-NEXT:    v128.any_true $push14=, $pop13
+; CHECK-NEXT:    i32.or $push15=, $pop11, $pop14
+; CHECK-NEXT:    return $pop15
   %bits = trunc <64 x i8> %x to <64 x i1>
   %ret = call i1 @llvm.vector.reduce.or.v64i1(<64 x i1> %bits)
   ret i1 %ret
 }
 
-;; TODO: Improve this codegen.
 define i1 @test_all_v64i8(<64 x i8> %x) {
 ; CHECK-LABEL: test_all_v64i8:
 ; CHECK:         .functype test_all_v64i8 (v128, v128, v128, v128) -> (i32)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    global.get $push287=, __stack_pointer
-; CHECK-NEXT:    i32.const $push288=, 16
-; CHECK-NEXT:    i32.sub $drop=, $pop287, $pop288
-; CHECK-NEXT:    i8x16.extract_lane_u $push220=, $2, 0
-; CHECK-NEXT:    i32.const $push1=, 1
-; CHECK-NEXT:    i32.and $push221=, $pop220, $pop1
-; CHECK-NEXT:    i8x16.extract_lane_u $push222=, $2, 1
-; CHECK-NEXT:    i32.const $push380=, 1
-; CHECK-NEXT:    i32.and $push223=, $pop222, $pop380
-; CHECK-NEXT:    i32.const $push379=, 1
-; CHECK-NEXT:    i32.shl $push224=, $pop223, $pop379
-; CHECK-NEXT:    i32.or $push225=, $pop221, $pop224
-; CHECK-NEXT:    i8x16.extract_lane_u $push226=, $2, 2
-; CHECK-NEXT:    i32.const $push378=, 1
-; CHECK-NEXT:    i32.and $push227=, $pop226, $pop378
-; CHECK-NEXT:    i32.const $push87=, 2
-; CHECK-NEXT:    i32.shl $push228=, $pop227, $pop87
-; CHECK-NEXT:    i32.or $push229=, $pop225, $pop228
-; CHECK-NEXT:    i8x16.extract_lane_u $push230=, $2, 3
-; CHECK-NEXT:    i32.const $push377=, 1
-; CHECK-NEXT:    i32.and $push231=, $pop230, $pop377
-; CHECK-NEXT:    i32.const $push92=, 3
-; CHECK-NEXT:    i32.shl $push232=, $pop231, $pop92
-; CHECK-NEXT:    i32.or $push233=, $pop229, $pop232
-; CHECK-NEXT:    i8x16.extract_lane_u $push234=, $2, 4
-; CHECK-NEXT:    i32.const $push376=, 1
-; CHECK-NEXT:    i32.and $push235=, $pop234, $pop376
-; CHECK-NEXT:    i32.const $push97=, 4
-; CHECK-NEXT:    i32.shl $push236=, $pop235, $pop97
-; CHECK-NEXT:    i32.or $push237=, $pop233, $pop236
-; CHECK-NEXT:    i8x16.extract_lane_u $push238=, $2, 5
-; CHECK-NEXT:    i32.const $push375=, 1
-; CHECK-NEXT:    i32.and $push239=, $pop238, $pop375
-; CHECK-NEXT:    i32.const $push102=, 5
-; CHECK-NEXT:    i32.shl $push240=, $pop239, $pop102
-; CHECK-NEXT:    i32.or $push241=, $pop237, $pop240
-; CHECK-NEXT:    i8x16.extract_lane_u $push242=, $2, 6
-; CHECK-NEXT:    i32.const $push374=, 1
-; CHECK-NEXT:    i32.and $push243=, $pop242, $pop374
-; CHECK-NEXT:    i32.const $push107=, 6
-; CHECK-NEXT:    i32.shl $push244=, $pop243, $pop107
-; CHECK-NEXT:    i32.or $push245=, $pop241, $pop244
-; CHECK-NEXT:    i8x16.extract_lane_u $push246=, $2, 7
-; CHECK-NEXT:    i32.const $push373=, 1
-; CHECK-NEXT:    i32.and $push247=, $pop246, $pop373
-; CHECK-NEXT:    i32.const $push112=, 7
-; CHECK-NEXT:    i32.shl $push248=, $pop247, $pop112
-; CHECK-NEXT:    i32.or $push249=, $pop245, $pop248
-; CHECK-NEXT:    i8x16.extract_lane_u $push250=, $2, 8
-; CHECK-NEXT:    i32.const $push372=, 1
-; CHECK-NEXT:    i32.and $push251=, $pop250, $pop372
-; CHECK-NEXT:    i32.const $push117=, 8
-; CHECK-NEXT:    i32.shl $push252=, $pop251, $pop117
-; CHECK-NEXT:    i32.or $push253=, $pop249, $pop252
-; CHECK-NEXT:    i8x16.extract_lane_u $push254=, $2, 9
-; CHECK-NEXT:    i32.const $push371=, 1
-; CHECK-NEXT:    i32.and $push255=, $pop254, $pop371
-; CHECK-NEXT:    i32.const $push122=, 9
-; CHECK-NEXT:    i32.shl $push256=, $pop255, $pop122
-; CHECK-NEXT:    i32.or $push257=, $pop253, $pop256
-; CHECK-NEXT:    i8x16.extract_lane_u $push258=, $2, 10
-; CHECK-NEXT:    i32.const $push370=, 1
-; CHECK-NEXT:    i32.and $push259=, $pop258, $pop370
-; CHECK-NEXT:    i32.const $push127=, 10
-; CHECK-NEXT:    i32.shl $push260=, $pop259, $pop127
-; CHECK-NEXT:    i32.or $push261=, $pop257, $pop260
-; CHECK-NEXT:    i8x16.extract_lane_u $push262=, $2, 11
-; CHECK-NEXT:    i32.const $push369=, 1
-; CHECK-NEXT:    i32.and $push263=, $pop262, $pop369
-; CHECK-NEXT:    i32.const $push132=, 11
-; CHECK-NEXT:    i32.shl $push264=, $pop263, $pop132
-; CHECK-NEXT:    i32.or $push265=, $pop261, $pop264
-; CHECK-NEXT:    i8x16.extract_lane_u $push266=, $2, 12
-; CHECK-NEXT:    i32.const $push368=, 1
-; CHECK-NEXT:    i32.and $push267=, $pop266, $pop368
-; CHECK-NEXT:    i32.const $push137=, 12
-; CHECK-NEXT:    i32.shl $push268=, $pop267, $pop137
-; CHECK-NEXT:    i32.or $push269=, $pop265, $pop268
-; CHECK-NEXT:    i8x16.extract_lane_u $push270=, $2, 13
-; CHECK-NEXT:    i32.const $push367=, 1
-; CHECK-NEXT:    i32.and $push271=, $pop270, $pop367
-; CHECK-NEXT:    i32.const $push142=, 13
-; CHECK-NEXT:    i32.shl $push272=, $pop271, $pop142
-; CHECK-NEXT:    i32.or $push273=, $pop269, $pop272
-; CHECK-NEXT:    i8x16.extract_lane_u $push274=, $2, 14
-; CHECK-NEXT:    i32.const $push366=, 1
-; CHECK-NEXT:    i32.and $push275=, $pop274, $pop366
-; CHECK-NEXT:    i32.const $push147=, 14
-; CHECK-NEXT:    i32.shl $push276=, $pop275, $pop147
-; CHECK-NEXT:    i32.or $push277=, $pop273, $pop276
-; CHECK-NEXT:    i8x16.extract_lane_u $push278=, $2, 15
-; CHECK-NEXT:    i32.const $push151=, 15
-; CHECK-NEXT:    i32.shl $push279=, $pop278, $pop151
-; CHECK-NEXT:    i32.or $push280=, $pop277, $pop279
-; CHECK-NEXT:    i32.const $push154=, 65535
-; CHECK-NEXT:    i32.and $push281=, $pop280, $pop154
-; CHECK-NEXT:    i8x16.extract_lane_u $push217=, $3, 15
-; CHECK-NEXT:    i32.const $push76=, 31
-; CHECK-NEXT:    i32.shl $push218=, $pop217, $pop76
-; CHECK-NEXT:    i8x16.extract_lane_u $push213=, $3, 14
-; CHECK-NEXT:    i32.const $push365=, 1
-; CHECK-NEXT:    i32.and $push214=, $pop213, $pop365
-; CHECK-NEXT:    i32.const $push72=, 30
-; CHECK-NEXT:    i32.shl $push215=, $pop214, $pop72
-; CHECK-NEXT:    i8x16.extract_lane_u $push209=, $3, 13
-; CHECK-NEXT:    i32.const $push364=, 1
-; CHECK-NEXT:    i32.and $push210=, $pop209, $pop364
-; CHECK-NEXT:    i32.const $push67=, 29
-; CHECK-NEXT:    i32.shl $push211=, $pop210, $pop67
-; CHECK-NEXT:    i8x16.extract_lane_u $push205=, $3, 12
-; CHECK-NEXT:    i32.const $push363=, 1
-; CHECK-NEXT:    i32.and $push206=, $pop205, $pop363
-; CHECK-NEXT:    i32.const $push62=, 28
-; CHECK-NEXT:    i32.shl $push207=, $pop206, $pop62
-; CHECK-NEXT:    i8x16.extract_lane_u $push201=, $3, 11
-; CHECK-NEXT:    i32.const $push362=, 1
-; CHECK-NEXT:    i32.and $push202=, $pop201, $pop362
-; CHECK-NEXT:    i32.const $push57=, 27
-; CHECK-NEXT:    i32.shl $push203=, $pop202, $pop57
-; CHECK-NEXT:    i8x16.extract_lane_u $push197=, $3, 10
-; CHECK-NEXT:    i32.const $push361=, 1
-; CHECK-NEXT:    i32.and $push198=, $pop197, $pop361
-; CHECK-NEXT:    i32.const $push52=, 26
-; CHECK-NEXT:    i32.shl $push199=, $pop198, $pop52
-; CHECK-NEXT:    i8x16.extract_lane_u $push193=, $3, 9
-; CHECK-NEXT:    i32.const $push360=, 1
-; CHECK-NEXT:    i32.and $push194=, $pop193, $pop360
-; CHECK-NEXT:    i32.const $push47=, 25
-; CHECK-NEXT:    i32.shl $push195=, $pop194, $pop47
-; CHECK-NEXT:    i8x16.extract_lane_u $push189=, $3, 8
-; CHECK-NEXT:    i32.const $push359=, 1
-; CHECK-NEXT:    i32.and $push190=, $pop189, $pop359
-; CHECK-NEXT:    i32.const $push42=, 24
-; CHECK-NEXT:    i32.shl $push191=, $pop190, $pop42
-; CHECK-NEXT:    i8x16.extract_lane_u $push185=, $3, 7
-; CHECK-NEXT:    i32.const $push358=, 1
-; CHECK-NEXT:    i32.and $push186=, $pop185, $pop358
-; CHECK-NEXT:    i32.const $push37=, 23
-; CHECK-NEXT:    i32.shl $push187=, $pop186, $pop37
-; CHECK-NEXT:    i8x16.extract_lane_u $push181=, $3, 6
-; CHECK-NEXT:    i32.const $push357=, 1
-; CHECK-NEXT:    i32.and $push182=, $pop181, $pop357
-; CHECK-NEXT:    i32.const $push32=, 22
-; CHECK-NEXT:    i32.shl $push183=, $pop182, $pop32
-; CHECK-NEXT:    i8x16.extract_lane_u $push177=, $3, 5
-; CHECK-NEXT:    i32.const $push356=, 1
-; CHECK-NEXT:    i32.and $push178=, $pop177, $pop356
-; CHECK-NEXT:    i32.const $push27=, 21
-; CHECK-NEXT:    i32.shl $push179=, $pop178, $pop27
-; CHECK-NEXT:    i8x16.extract_lane_u $push173=, $3, 4
-; CHECK-NEXT:    i32.const $push355=, 1
-; CHECK-NEXT:    i32.and $push174=, $pop173, $pop355
-; CHECK-NEXT:    i32.const $push22=, 20
-; CHECK-NEXT:    i32.shl $push175=, $pop174, $pop22
-; CHECK-NEXT:    i8x16.extract_lane_u $push169=, $3, 3
-; CHECK-NEXT:    i32.const $push354=, 1
-; CHECK-NEXT:    i32.and $push170=, $pop169, $pop354
-; CHECK-NEXT:    i32.const $push17=, 19
-; CHECK-NEXT:    i32.shl $push171=, $pop170, $pop17
-; CHECK-NEXT:    i8x16.extract_lane_u $push165=, $3, 2
-; CHECK-NEXT:    i32.const $push353=, 1
-; CHECK-NEXT:    i32.and $push166=, $pop165, $pop353
-; CHECK-NEXT:    i32.const $push12=, 18
-; CHECK-NEXT:    i32.shl $push167=, $pop166, $pop12
-; CHECK-NEXT:    i8x16.extract_lane_u $push161=, $3, 1
-; CHECK-NEXT:    i32.const $push352=, 1
-; CHECK-NEXT:    i32.and $push162=, $pop161, $pop352
-; CHECK-NEXT:    i32.const $push7=, 17
-; CHECK-NEXT:    i32.shl $push163=, $pop162, $pop7
-; CHECK-NEXT:    i8x16.extract_lane_u $push158=, $3, 0
-; CHECK-NEXT:    i32.const $push351=, 1
-; CHECK-NEXT:    i32.and $push159=, $pop158, $pop351
-; CHECK-NEXT:    i32.const $push3=, 16
-; CHECK-NEXT:    i32.shl $push160=, $pop159, $pop3
-; CHECK-NEXT:    i32.or $push164=, $pop163, $pop160
-; CHECK-NEXT:    i32.or $push168=, $pop167, $pop164
-; CHECK-NEXT:    i32.or $push172=, $pop171, $pop168
-; CHECK-NEXT:    i32.or $push176=, $pop175, $pop172
-; CHECK-NEXT:    i32.or $push180=, $pop179, $pop176
-; CHECK-NEXT:    i32.or $push184=, $pop183, $pop180
-; CHECK-NEXT:    i32.or $push188=, $pop187, $pop184
-; CHECK-NEXT:    i32.or $push192=, $pop191, $pop188
-; CHECK-NEXT:    i32.or $push196=, $pop195, $pop192
-; CHECK-NEXT:    i32.or $push200=, $pop199, $pop196
-; CHECK-NEXT:    i32.or $push204=, $pop203, $pop200
-; CHECK-NEXT:    i32.or $push208=, $pop207, $pop204
-; CHECK-NEXT:    i32.or $push212=, $pop211, $pop208
-; CHECK-NEXT:    i32.or $push216=, $pop215, $pop212
-; CHECK-NEXT:    i32.or $push219=, $pop218, $pop216
-; CHECK-NEXT:    i32.or $push282=, $pop281, $pop219
-; CHECK-NEXT:    i64.extend_i32_u $push283=, $pop282
-; CHECK-NEXT:    i8x16.extract_lane_u $push79=, $0, 0
-; CHECK-NEXT:    i32.const $push350=, 1
-; CHECK-NEXT:    i32.and $push80=, $pop79, $pop350
-; CHECK-NEXT:    i8x16.extract_lane_u $push81=, $0, 1
-; CHECK-NEXT:    i32.const $push349=, 1
-; CHECK-NEXT:    i32.and $push82=, $pop81, $pop349
-; CHECK-NEXT:    i32.const $push348=, 1
-; CHECK-NEXT:    i32.shl $push83=, $pop82, $pop348
-; CHECK-NEXT:    i32.or $push84=, $pop80, $pop83
-; CHECK-NEXT:    i8x16.extract_lane_u $push85=, $0, 2
-; CHECK-NEXT:    i32.const $push347=, 1
-; CHECK-NEXT:    i32.and $push86=, $pop85, $pop347
-; CHECK-NEXT:    i32.const $push346=, 2
-; CHECK-NEXT:    i32.shl $push88=, $pop86, $pop346
-; CHECK-NEXT:    i32.or $push89=, $pop84, $pop88
-; CHECK-NEXT:    i8x16.extract_lane_u $push90=, $0, 3
-; CHECK-NEXT:    i32.const $push345=, 1
-; CHECK-NEXT:    i32.and $push91=, $pop90, $pop345
-; CHECK-NEXT:    i32.const $push344=, 3
-; CHECK-NEXT:    i32.shl $push93=, $pop91, $pop344
-; CHECK-NEXT:    i32.or $push94=, $pop89, $pop93
-; CHECK-NEXT:    i8x16.extract_lane_u $push95=, $0, 4
-; CHECK-NEXT:    i32.const $push343=, 1
-; CHECK-NEXT:    i32.and $push96=, $pop95, $pop343
-; CHECK-NEXT:    i32.const $push342=, 4
-; CHECK-NEXT:    i32.shl $push98=, $pop96, $pop342
-; CHECK-NEXT:    i32.or $push99=, $pop94, $pop98
-; CHECK-NEXT:    i8x16.extract_lane_u $push100=, $0, 5
-; CHECK-NEXT:    i32.const $push341=, 1
-; CHECK-NEXT:    i32.and $push101=, $pop100, $pop341
-; CHECK-NEXT:    i32.const $push340=, 5
-; CHECK-NEXT:    i32.shl $push103=, $pop101, $pop340
-; CHECK-NEXT:    i32.or $push104=, $pop99, $pop103
-; CHECK-NEXT:    i8x16.extract_lane_u $push105=, $0, 6
-; CHECK-NEXT:    i32.const $push339=, 1
-; CHECK-NEXT:    i32.and $push106=, $pop105, $pop339
-; CHECK-NEXT:    i32.const $push338=, 6
-; CHECK-NEXT:    i32.shl $push108=, $pop106, $pop338
-; CHECK-NEXT:    i32.or $push109=, $pop104, $pop108
-; CHECK-NEXT:    i8x16.extract_lane_u $push110=, $0, 7
-; CHECK-NEXT:    i32.const $push337=, 1
-; CHECK-NEXT:    i32.and $push111=, $pop110, $pop337
-; CHECK-NEXT:    i32.const $push336=, 7
-; CHECK-NEXT:    i32.shl $push113=, $pop111, $pop336
-; CHECK-NEXT:    i32.or $push114=, $pop109, $pop113
-; CHECK-NEXT:    i8x16.extract_lane_u $push115=, $0, 8
-; CHECK-NEXT:    i32.const $push335=, 1
-; CHECK-NEXT:    i32.and $push116=, $pop115, $pop335
-; CHECK-NEXT:    i32.const $push334=, 8
-; CHECK-NEXT:    i32.shl $push118=, $pop116, $pop334
-; CHECK-NEXT:    i32.or $push119=, $pop114, $pop118
-; CHECK-NEXT:    i8x16.extract_lane_u $push120=, $0, 9
-; CHECK-NEXT:    i32.const $push333=, 1
-; CHECK-NEXT:    i32.and $push121=, $pop120, $pop333
-; CHECK-NEXT:    i32.const $push332=, 9
-; CHECK-NEXT:    i32.shl $push123=, $pop121, $pop332
-; CHECK-NEXT:    i32.or $push124=, $pop119, $pop123
-; CHECK-NEXT:    i8x16.extract_lane_u $push125=, $0, 10
-; CHECK-NEXT:    i32.const $push331=, 1
-; CHECK-NEXT:    i32.and $push126=, $pop125, $pop331
-; CHECK-NEXT:    i32.const $push330=, 10
-; CHECK-NEXT:    i32.shl $push128=, $pop126, $pop330
-; CHECK-NEXT:    i32.or $push129=, $pop124, $pop128
-; CHECK-NEXT:    i8x16.extract_lane_u $push130=, $0, 11
-; CHECK-NEXT:    i32.const $push329=, 1
-; CHECK-NEXT:    i32.and $push131=, $pop130, $pop329
-; CHECK-NEXT:    i32.const $push328=, 11
-; CHECK-NEXT:    i32.shl $push133=, $pop131, $pop328
-; CHECK-NEXT:    i32.or $push134=, $pop129, $pop133
-; CHECK-NEXT:    i8x16.extract_lane_u $push135=, $0, 12
-; CHECK-NEXT:    i32.const $push327=, 1
-; CHECK-NEXT:    i32.and $push136=, $pop135, $pop327
-; CHECK-NEXT:    i32.const $push326=, 12
-; CHECK-NEXT:    i32.shl $push138=, $pop136, $pop326
-; CHECK-NEXT:    i32.or $push139=, $pop134, $pop138
-; CHECK-NEXT:    i8x16.extract_lane_u $push140=, $0, 13
-; CHECK-NEXT:    i32.const $push325=, 1
-; CHECK-NEXT:    i32.and $push141=, $pop140, $pop325
-; CHECK-NEXT:    i32.const $push324=, 13
-; CHECK-NEXT:    i32.shl $push143=, $pop141, $pop324
-; CHECK-NEXT:    i32.or $push144=, $pop139, $pop143
-; CHECK-NEXT:    i8x16.extract_lane_u $push145=, $0, 14
-; CHECK-NEXT:    i32.const $push323=, 1
-; CHECK-NEXT:    i32.and $push146=, $pop145, $pop323
-; CHECK-NEXT:    i32.const $push322=, 14
-; CHECK-NEXT:    i32.shl $push148=, $pop146, $pop322
-; CHECK-NEXT:    i32.or $push149=, $pop144, $pop148
-; CHECK-NEXT:    i8x16.extract_lane_u $push150=, $0, 15
-; CHECK-NEXT:    i32.const $push321=, 15
-; CHECK-NEXT:    i32.shl $push152=, $pop150, $pop321
-; CHECK-NEXT:    i32.or $push153=, $pop149, $pop152
-; CHECK-NEXT:    i32.const $push320=, 65535
-; CHECK-NEXT:    i32.and $push155=, $pop153, $pop320
-; CHECK-NEXT:    i8x16.extract_lane_u $push75=, $1, 15
-; CHECK-NEXT:    i32.const $push319=, 31
-; CHECK-NEXT:    i32.shl $push77=, $pop75, $pop319
-; CHECK-NEXT:    i8x16.extract_lane_u $push70=, $1, 14
-; CHECK-NEXT:    i32.const $push318=, 1
-; CHECK-NEXT:    i32.and $push71=, $pop70, $pop318
-; CHECK-NEXT:    i32.const $push317=, 30
-; CHECK-NEXT:    i32.shl $push73=, $pop71, $pop317
-; CHECK-NEXT:    i8x16.extract_lane_u $push65=, $1, 13
-; CHECK-NEXT:    i32.const $push316=, 1
-; CHECK-NEXT:    i32.and $push66=, $pop65, $pop316
-; CHECK-NEXT:    i32.const $push315=, 29
-; CHECK-NEXT:    i32.shl $push68=, $pop66, $pop315
-; CHECK-NEXT:    i8x16.extract_lane_u $push60=, $1, 12
-; CHECK-NEXT:    i32.const $push314=, 1
-; CHECK-NEXT:    i32.and $push61=, $pop60, $pop314
-; CHECK-NEXT:    i32.const $push313=, 28
-; CHECK-NEXT:    i32.shl $push63=, $pop61, $pop313
-; CHECK-NEXT:    i8x16.extract_lane_u $push55=, $1, 11
-; CHECK-NEXT:    i32.const $push312=, 1
-; CHECK-NEXT:    i32.and $push56=, $pop55, $pop312
-; CHECK-NEXT:    i32.const $push311=, 27
-; CHECK-NEXT:    i32.shl $push58=, $pop56, $pop311
-; CHECK-NEXT:    i8x16.extract_lane_u $push50=, $1, 10
-; CHECK-NEXT:    i32.const $push310=, 1
-; CHECK-NEXT:    i32.and $push51=, $pop50, $pop310
-; CHECK-NEXT:    i32.const $push309=, 26
-; CHECK-NEXT:    i32.shl $push53=, $pop51, $pop309
-; CHECK-NEXT:    i8x16.extract_lane_u $push45=, $1, 9
-; CHECK-NEXT:    i32.const $push308=, 1
-; CHECK-NEXT:    i32.and $push46=, $pop45, $pop308
-; CHECK-NEXT:    i32.const $push307=, 25
-; CHECK-NEXT:    i32.shl $push48=, $pop46, $pop307
-; CHECK-NEXT:    i8x16.extract_lane_u $push40=, $1, 8
-; CHECK-NEXT:    i32.const $push306=, 1
-; CHECK-NEXT:    i32.and $push41=, $pop40, $pop306
-; CHECK-NEXT:    i32.const $push305=, 24
-; CHECK-NEXT:    i32.shl $push43=, $pop41, $pop305
-; CHECK-NEXT:    i8x16.extract_lane_u $push35=, $1, 7
-; CHECK-NEXT:    i32.const $push304=, 1
-; CHECK-NEXT:    i32.and $push36=, $pop35, $pop304
-; CHECK-NEXT:    i32.const $push303=, 23
-; CHECK-NEXT:    i32.shl $push38=, $pop36, $pop303
-; CHECK-NEXT:    i8x16.extract_lane_u $push30=, $1, 6
-; CHECK-NEXT:    i32.const $push302=, 1
-; CHECK-NEXT:    i32.and $push31=, $pop30, $pop302
-; CHECK-NEXT:    i32.const $push301=, 22
-; CHECK-NEXT:    i32.shl $push33=, $pop31, $pop301
-; CHECK-NEXT:    i8x16.extract_lane_u $push25=, $1, 5
-; CHECK-NEXT:    i32.const $push300=, 1
-; CHECK-NEXT:    i32.and $push26=, $pop25, $pop300
-; CHECK-NEXT:    i32.const $push299=, 21
-; CHECK-NEXT:    i32.shl $push28=, $pop26, $pop299
-; CHECK-NEXT:    i8x16.extract_lane_u $push20=, $1, 4
-; CHECK-NEXT:    i32.const $push298=, 1
-; CHECK-NEXT:    i32.and $push21=, $pop20, $pop298
-; CHECK-NEXT:    i32.const $push297=, 20
-; CHECK-NEXT:    i32.shl $push23=, $pop21, $pop297
-; CHECK-NEXT:    i8x16.extract_lane_u $push15=, $1, 3
-; CHECK-NEXT:    i32.const $push296=, 1
-; CHECK-NEXT:    i32.and $push16=, $pop15, $pop296
-; CHECK-NEXT:    i32.const $push295=, 19
-; CHECK-NEXT:    i32.shl $push18=, $pop16, $pop295
-; CHECK-NEXT:    i8x16.extract_lane_u $push10=, $1, 2
-; CHECK-NEXT:    i32.const $push294=, 1
-; CHECK-NEXT:    i32.and $push11=, $pop10, $pop294
-; CHECK-NEXT:    i32.const $push293=, 18
-; CHECK-NEXT:    i32.shl $push13=, $pop11, $pop293
-; CHECK-NEXT:    i8x16.extract_lane_u $push5=, $1, 1
-; CHECK-NEXT:    i32.const $push292=, 1
-; CHECK-NEXT:    i32.and $push6=, $pop5, $pop292
-; CHECK-NEXT:    i32.const $push291=, 17
-; CHECK-NEXT:    i32.shl $push8=, $pop6, $pop291
-; CHECK-NEXT:    i8x16.extract_lane_u $push0=, $1, 0
-; CHECK-NEXT:    i32.const $push290=, 1
-; CHECK-NEXT:    i32.and $push2=, $pop0, $pop290
-; CHECK-NEXT:    i32.const $push289=, 16
-; CHECK-NEXT:    i32.shl $push4=, $pop2, $pop289
-; CHECK-NEXT:    i32.or $push9=, $pop8, $pop4
-; CHECK-NEXT:    i32.or $push14=, $pop13, $pop9
-; CHECK-NEXT:    i32.or $push19=, $pop18, $pop14
-; CHECK-NEXT:    i32.or $push24=, $pop23, $pop19
-; CHECK-NEXT:    i32.or $push29=, $pop28, $pop24
-; CHECK-NEXT:    i32.or $push34=, $pop33, $pop29
-; CHECK-NEXT:    i32.or $push39=, $pop38, $pop34
-; CHECK-NEXT:    i32.or $push44=, $pop43, $pop39
-; CHECK-NEXT:    i32.or $push49=, $pop48, $pop44
-; CHECK-NEXT:    i32.or $push54=, $pop53, $pop49
-; CHECK-NEXT:    i32.or $push59=, $pop58, $pop54
-; CHECK-NEXT:    i32.or $push64=, $pop63, $pop59
-; CHECK-NEXT:    i32.or $push69=, $pop68, $pop64
-; CHECK-NEXT:    i32.or $push74=, $pop73, $pop69
-; CHECK-NEXT:    i32.or $push78=, $pop77, $pop74
-; CHECK-NEXT:    i32.or $push156=, $pop155, $pop78
-; CHECK-NEXT:    i64.extend_i32_u $push157=, $pop156
-; CHECK-NEXT:    i64.and $push284=, $pop283, $pop157
-; CHECK-NEXT:    i64.const $push285=, 4294967295
-; CHECK-NEXT:    i64.eq $push286=, $pop284, $pop285
-; CHECK-NEXT:    return $pop286
+; CHECK-NEXT:    i32.const $push0=, 7
+; CHECK-NEXT:    i8x16.shl $push4=, $0, $pop0
+; CHECK-NEXT:    i32.const $push22=, 7
+; CHECK-NEXT:    i8x16.shr_s $push5=, $pop4, $pop22
+; CHECK-NEXT:    i8x16.all_true $push6=, $pop5
+; CHECK-NEXT:    i32.const $push21=, 7
+; CHECK-NEXT:    i8x16.shl $push1=, $1, $pop21
+; CHECK-NEXT:    i32.const $push20=, 7
+; CHECK-NEXT:    i8x16.shr_s $push2=, $pop1, $pop20
+; CHECK-NEXT:    i8x16.all_true $push3=, $pop2
+; CHECK-NEXT:    i32.and $push7=, $pop6, $pop3
+; CHECK-NEXT:    i32.const $push19=, 7
+; CHECK-NEXT:    i8x16.shl $push8=, $2, $pop19
+; CHECK-NEXT:    i32.const $push18=, 7
+; CHECK-NEXT:    i8x16.shr_s $push9=, $pop8, $pop18
+; CHECK-NEXT:    i8x16.all_true $push10=, $pop9
+; CHECK-NEXT:    i32.and $push11=, $pop7, $pop10
+; CHECK-NEXT:    i32.const $push17=, 7
+; CHECK-NEXT:    i8x16.shl $push12=, $3, $pop17
+; CHECK-NEXT:    i32.const $push16=, 7
+; CHECK-NEXT:    i8x16.shr_s $push13=, $pop12, $pop16
+; CHECK-NEXT:    i8x16.all_true $push14=, $pop13
+; CHECK-NEXT:    i32.and $push15=, $pop11, $pop14
+; CHECK-NEXT:    return $pop15
   %bits = trunc <64 x i8> %x to <64 x i1>
   %ret = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %bits)
   ret i1 %ret



More information about the llvm-commits mailing list